FP32 IMAGIC variants of scalar QC8/QS8/QU8 GEMM/IGEMM/DWCONV microkernels

PiperOrigin-RevId: 419687973
diff --git a/test/qc8-dwconv-minmax-fp32.cc b/test/qc8-dwconv-minmax-fp32.cc
index 1103df3..d30f371 100644
--- a/test/qc8-dwconv-minmax-fp32.cc
+++ b/test/qc8-dwconv-minmax-fp32.cc
@@ -11834,468 +11834,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(9)
-    .channels(1)
-    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(9)
-    .channels(2)
-    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(9)
-    .channels(4)
-    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
 TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
   DWConvMicrokernelTester()
     .cr(1)
@@ -12758,6 +12296,930 @@
   }
 }
 
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(QC8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL8_LD64, c_eq_8) {
     TEST_REQUIRES_ARM_NEON;
@@ -24572,468 +25034,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(25)
-    .channels(1)
-    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(25)
-    .channels(2)
-    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(25)
-    .channels(4)
-    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
 TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
   DWConvMicrokernelTester()
     .cr(1)
@@ -25494,4 +25494,928 @@
         .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
     }
   }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
 }
\ No newline at end of file
diff --git a/test/qc8-dwconv-minmax-fp32.yaml b/test/qc8-dwconv-minmax-fp32.yaml
index b0f0bb2..15dc85a 100644
--- a/test/qc8-dwconv-minmax-fp32.yaml
+++ b/test/qc8-dwconv-minmax-fp32.yaml
@@ -131,18 +131,24 @@
   init: xnn_init_qs8_minmax_wasmsimd_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16
   init: xnn_init_qs8_minmax_wasmsimd_params
-- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul8_ld64
   init: xnn_init_qs8_minmax_neon_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul8_ld64
@@ -271,15 +277,21 @@
   init: xnn_init_qs8_minmax_wasmsimd_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16
   init: xnn_init_qs8_minmax_wasmsimd_params
-- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
 - name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+- name: xnn_qc8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
diff --git a/test/qc8-gemm-minmax-fp32.cc b/test/qc8-gemm-minmax-fp32.cc
index 80b2a7d..880e442 100644
--- a/test/qc8-gemm-minmax-fp32.cc
+++ b/test/qc8-gemm-minmax-fp32.cc
@@ -77180,2654 +77180,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
@@ -82474,3 +79826,5299 @@
     .cm_stride(7)
     .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
 }
+
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
diff --git a/test/qc8-gemm-minmax-fp32.yaml b/test/qc8-gemm-minmax-fp32.yaml
index 23e095c..bfba07b 100644
--- a/test/qc8-gemm-minmax-fp32.yaml
+++ b/test/qc8-gemm-minmax-fp32.yaml
@@ -539,30 +539,6 @@
 - name: xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128
   init: xnn_init_qs8_minmax_wasmsimd_params
   k-block: 8
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
 - name: xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
   k-block: 1
@@ -587,3 +563,51 @@
 - name: xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
   k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
diff --git a/test/qc8-igemm-minmax-fp32.cc b/test/qc8-igemm-minmax-fp32.cc
index de7517e..f26b2b8 100644
--- a/test/qc8-igemm-minmax-fp32.cc
+++ b/test/qc8-igemm-minmax-fp32.cc
@@ -75029,2990 +75029,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(7)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 1; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(7)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(13)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 2; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(13)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(17)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 3; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(17)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(23)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 4; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(23)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(7)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 1; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(7)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(13)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 2; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(13)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(17)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 3; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(17)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(23)
-      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 4; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(23)
-        .zero_index(mz)
-        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
@@ -80995,3 +78011,5971 @@
     .cm_stride(7)
     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_minmax_scalar_fmagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
 }
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_minmax_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_minmax_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
diff --git a/test/qc8-igemm-minmax-fp32.yaml b/test/qc8-igemm-minmax-fp32.yaml
index 6c1861e..1c1611d 100644
--- a/test/qc8-igemm-minmax-fp32.yaml
+++ b/test/qc8-igemm-minmax-fp32.yaml
@@ -506,30 +506,6 @@
 - name: xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128
   init: xnn_init_qs8_minmax_wasmsimd_params
   k-block: 8
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf
-  init: xnn_init_qs8_minmax_scalar_lrintf_params
-  k-block: 1
 - name: xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
   k-block: 1
@@ -554,3 +530,51 @@
 - name: xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic
   init: xnn_init_qs8_minmax_scalar_fmagic_params
   k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic
+  init: xnn_init_qs8_minmax_scalar_imagic_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf
+  init: xnn_init_qs8_minmax_scalar_lrintf_params
+  k-block: 1
diff --git a/test/qs8-dwconv-minmax-fp32.cc b/test/qs8-dwconv-minmax-fp32.cc
index cc067d6..accc2a9 100644
--- a/test/qs8-dwconv-minmax-fp32.cc
+++ b/test/qs8-dwconv-minmax-fp32.cc
@@ -9602,468 +9602,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(9)
-    .channels(1)
-    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(9)
-    .channels(2)
-    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(9)
-    .channels(4)
-    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
 TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
   DWConvMicrokernelTester()
     .cr(1)
@@ -10526,6 +10064,930 @@
   }
 }
 
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
     TEST_REQUIRES_ARM_NEON;
@@ -20108,468 +20570,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(25)
-    .channels(1)
-    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(25)
-    .channels(2)
-    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(25)
-    .channels(4)
-    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
 TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
   DWConvMicrokernelTester()
     .cr(1)
@@ -21030,4 +21030,928 @@
         .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
     }
   }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
 }
\ No newline at end of file
diff --git a/test/qs8-dwconv-minmax-fp32.yaml b/test/qs8-dwconv-minmax-fp32.yaml
index fea6679..1f02558 100644
--- a/test/qs8-dwconv-minmax-fp32.yaml
+++ b/test/qs8-dwconv-minmax-fp32.yaml
@@ -106,18 +106,24 @@
   init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16_add16
   init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16
   init: xnn_init_qs8_conv_minmax_fp32_neon_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16
@@ -222,15 +228,21 @@
   init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16_add16
   init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
diff --git a/test/qs8-gemm-minmax-fp32.cc b/test/qs8-gemm-minmax-fp32.cc
index 34b0430..e6f64f5 100644
--- a/test/qs8-gemm-minmax-fp32.cc
+++ b/test/qs8-gemm-minmax-fp32.cc
@@ -94107,2654 +94107,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
 TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
@@ -99401,3 +96753,5299 @@
     .cm_stride(7)
     .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
 }
+
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
diff --git a/test/qs8-gemm-minmax-fp32.yaml b/test/qs8-gemm-minmax-fp32.yaml
index 96a0baa..20c3c34 100644
--- a/test/qs8-gemm-minmax-fp32.yaml
+++ b/test/qs8-gemm-minmax-fp32.yaml
@@ -650,30 +650,6 @@
 - name: xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16
   init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
 - name: xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
@@ -698,3 +674,51 @@
 - name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
diff --git a/test/qs8-igemm-minmax-fp32.cc b/test/qs8-igemm-minmax-fp32.cc
index 69350b7..3c21f5b 100644
--- a/test/qs8-igemm-minmax-fp32.cc
+++ b/test/qs8-igemm-minmax-fp32.cc
@@ -74093,2990 +74093,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(7)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 1; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(7)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(13)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 2; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(13)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(17)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 3; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(17)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(23)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 4; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(23)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(7)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 1; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(7)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(13)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 2; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(13)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(17)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 3; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(17)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(23)
-      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 4; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(23)
-        .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
-}
-
-
 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
@@ -80059,3 +77075,5971 @@
     .cm_stride(7)
     .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
 }
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
+
+TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+}
diff --git a/test/qs8-igemm-minmax-fp32.yaml b/test/qs8-igemm-minmax-fp32.yaml
index d81de8c..295c945 100644
--- a/test/qs8-igemm-minmax-fp32.yaml
+++ b/test/qs8-igemm-minmax-fp32.yaml
@@ -499,30 +499,6 @@
 - name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128
   init: xnn_init_qs8_conv_minmax_fp32_wasmsimd_params
   k-block: 8
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf
-  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
 - name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
@@ -547,3 +523,51 @@
 - name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic
   init: xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf
+  init: xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
diff --git a/test/qu8-dwconv-minmax-fp32.cc b/test/qu8-dwconv-minmax-fp32.cc
index 71b8ab4..ec3afde 100644
--- a/test/qu8-dwconv-minmax-fp32.cc
+++ b/test/qu8-dwconv-minmax-fp32.cc
@@ -5961,546 +5961,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(9)
-    .channels(1)
-    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_zero_point_only) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(255)
-      .kernel_zero_point(0)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, kernel_zero_point_only) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(0)
-      .kernel_zero_point(255)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(9)
-    .channels(2)
-    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_zero_point_only) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(255)
-      .kernel_zero_point(0)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, kernel_zero_point_only) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(0)
-      .kernel_zero_point(255)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(9)
-    .channels(4)
-    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_zero_point_only) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(255)
-      .kernel_zero_point(0)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, kernel_zero_point_only) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(0)
-      .kernel_zero_point(255)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
   DWConvMicrokernelTester()
     .cr(1)
@@ -7041,6 +6501,1086 @@
   }
 }
 
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
     TEST_REQUIRES_ARM_NEON;
@@ -12982,546 +13522,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(25)
-    .channels(1)
-    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_zero_point_only) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(255)
-      .kernel_zero_point(0)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, kernel_zero_point_only) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(0)
-      .kernel_zero_point(255)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(25)
-    .channels(2)
-    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_zero_point_only) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(255)
-      .kernel_zero_point(0)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, kernel_zero_point_only) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(0)
-      .kernel_zero_point(255)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(25)
-    .channels(4)
-    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_zero_point_only) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(255)
-      .kernel_zero_point(0)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, kernel_zero_point_only) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .input_zero_point(0)
-      .kernel_zero_point(255)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
   DWConvMicrokernelTester()
     .cr(1)
@@ -14060,4 +14060,1084 @@
         .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
     }
   }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(255)
+      .kernel_zero_point(0)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, kernel_zero_point_only) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .input_zero_point(0)
+      .kernel_zero_point(255)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
 }
\ No newline at end of file
diff --git a/test/qu8-dwconv-minmax-fp32.yaml b/test/qu8-dwconv-minmax-fp32.yaml
index 9ac55fd..fa1b330 100644
--- a/test/qu8-dwconv-minmax-fp32.yaml
+++ b/test/qu8-dwconv-minmax-fp32.yaml
@@ -58,18 +58,24 @@
   init: xnn_init_qu8_conv_minmax_fp32_wasmsimd_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16
   init: xnn_init_qu8_conv_minmax_fp32_wasmsimd_params
-- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16
   init: xnn_init_qu8_conv_minmax_fp32_neon_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16
@@ -126,15 +132,21 @@
   init: xnn_init_qu8_conv_minmax_fp32_wasmsimd_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16
   init: xnn_init_qu8_conv_minmax_fp32_wasmsimd_params
-- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
 - name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+- name: xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
diff --git a/test/qu8-gemm-minmax-fp32.cc b/test/qu8-gemm-minmax-fp32.cc
index 37c5a2f..b833f56 100644
--- a/test/qu8-gemm-minmax-fp32.cc
+++ b/test/qu8-gemm-minmax-fp32.cc
@@ -47843,3014 +47843,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .a_stride(3)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_stride(11)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .a_stride(7)
-        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
 TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
@@ -53857,4 +50849,6020 @@
       .b_zero_point(0)
       .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
   }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_a) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_a) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_strided_a) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .a_stride(3)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_strided_a) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_stride(11)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_a) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_a) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .a_stride(7)
+        .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_GEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
 }
\ No newline at end of file
diff --git a/test/qu8-gemm-minmax-fp32.yaml b/test/qu8-gemm-minmax-fp32.yaml
index 7e52b9a..62bfe78 100644
--- a/test/qu8-gemm-minmax-fp32.yaml
+++ b/test/qu8-gemm-minmax-fp32.yaml
@@ -292,30 +292,6 @@
 - name: xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128
   init: xnn_init_qu8_conv_minmax_fp32_wasmsimd_params
   k-block: 8
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
 - name: xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
@@ -340,3 +316,51 @@
 - name: xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
diff --git a/test/qu8-igemm-minmax-fp32.cc b/test/qu8-igemm-minmax-fp32.cc
index 3b5506c..de8824e 100644
--- a/test/qu8-igemm-minmax-fp32.cc
+++ b/test/qu8-igemm-minmax-fp32.cc
@@ -48995,3350 +48995,6 @@
 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
 
 
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(7)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 1; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(7)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(13)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 2; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(13)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(17)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 3; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(17)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cn_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 2; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(2)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 2; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(5)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
-  for (uint32_t n = 3; n < 4; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
-  for (uint32_t n = 4; n <= 6; n += 2) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 2; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(2)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(5)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .ks(3)
-      .a_offset(23)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 4; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(2)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(2)
-        .k(k)
-        .ks(3)
-        .a_offset(23)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(2)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(2)
-    .k(1)
-    .cm_stride(5)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(2)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(2)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 1; m++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 1; m++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 1; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(1)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(7)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 1; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(1)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(1)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(7)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(1)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(1)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(1)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(1)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 2; m++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 2; m++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 2; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(2)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(13)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 2; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(2)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(2)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(13)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(2)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(2)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(2)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(2)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 3; m++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 3; m++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 3; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(3)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(17)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 3; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(3)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(3)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(17)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(3)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(3)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(3)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(3)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cn_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    for (uint32_t n = 1; n <= 4; n++) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(m)
-        .n(n)
-        .k(1)
-        .iterations(1)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
-  for (uint32_t m = 1; m <= 4; m++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(m)
-      .n(4)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
-  for (uint32_t n = 1; n <= 4; n++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(n)
-      .k(1)
-      .iterations(1)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
-  for (size_t k = 2; k < 10; k++) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
-  for (size_t k = 2; k < 10; k++) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(n)
-        .k(k)
-        .cn_stride(7)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      for (uint32_t m = 1; m <= 4; m++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .ks(3)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
-  for (uint32_t n = 5; n < 8; n++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
-  for (uint32_t n = 8; n <= 12; n += 4) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    for (uint32_t m = 1; m <= 4; m++) {
-      for (uint32_t n = 1; n <= 4; n++) {
-        GemmMicrokernelTester()
-          .mr(4)
-          .nr(4)
-          .kr(1)
-          .sr(1)
-          .m(m)
-          .n(n)
-          .k(k)
-          .cm_stride(7)
-          .iterations(1)
-          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-      }
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .ks(3)
-      .a_offset(23)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
-  for (uint32_t mz = 0; mz < 4; mz++) {
-    for (size_t k = 1; k <= 5; k += 2) {
-      GemmMicrokernelTester()
-        .mr(4)
-        .nr(4)
-        .kr(1)
-        .sr(1)
-        .m(4)
-        .n(4)
-        .k(k)
-        .ks(3)
-        .a_offset(23)
-        .zero_index(mz)
-        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-    }
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmin(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .qmax(128)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
-  GemmMicrokernelTester()
-    .mr(4)
-    .nr(4)
-    .kr(1)
-    .sr(1)
-    .m(4)
-    .n(4)
-    .k(1)
-    .cm_stride(7)
-    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_a_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_b_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
-TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_zero_point) {
-  for (size_t k = 1; k <= 5; k += 2) {
-    GemmMicrokernelTester()
-      .mr(4)
-      .nr(4)
-      .kr(1)
-      .sr(1)
-      .m(4)
-      .n(4)
-      .k(k)
-      .a_zero_point(0)
-      .b_zero_point(0)
-      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
-  }
-}
-
 TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
@@ -55681,4 +52337,6692 @@
       .b_zero_point(0)
       .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
   }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cn_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 2; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(2)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 2; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(5)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
+  for (uint32_t n = 3; n < 4; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
+  for (uint32_t n = 4; n <= 6; n += 2) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 2; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(2)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(5)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(2)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(2)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(2)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(2)
+    .k(1)
+    .cm_stride(5)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(2)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(2)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 1; m++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(7)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 1; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(1)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(7)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(1)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(1)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(1)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 2; m++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(13)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 2; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(2)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(13)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(2)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(2)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(2)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 3; m++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(17)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 3; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(3)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(17)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(3)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(3)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(3)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cn_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    for (uint32_t n = 1; n <= 4; n++) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(m)
+        .n(n)
+        .k(1)
+        .iterations(1)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
+  for (uint32_t m = 1; m <= 4; m++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(m)
+      .n(4)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
+  for (uint32_t n = 1; n <= 4; n++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(n)
+      .k(1)
+      .iterations(1)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
+  for (size_t k = 2; k < 10; k++) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
+  for (size_t k = 2; k < 10; k++) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(n)
+        .k(k)
+        .cn_stride(7)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      for (uint32_t m = 1; m <= 4; m++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .ks(3)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
+  for (uint32_t n = 5; n < 8; n++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
+  for (uint32_t n = 8; n <= 12; n += 4) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    for (uint32_t m = 1; m <= 4; m++) {
+      for (uint32_t n = 1; n <= 4; n++) {
+        GemmMicrokernelTester()
+          .mr(4)
+          .nr(4)
+          .kr(1)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(k)
+          .cm_stride(7)
+          .iterations(1)
+          .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+      }
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .ks(3)
+      .a_offset(23)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
+  for (uint32_t mz = 0; mz < 4; mz++) {
+    for (size_t k = 1; k <= 5; k += 2) {
+      GemmMicrokernelTester()
+        .mr(4)
+        .nr(4)
+        .kr(1)
+        .sr(1)
+        .m(4)
+        .n(4)
+        .k(k)
+        .ks(3)
+        .a_offset(23)
+        .zero_index(mz)
+        .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+    }
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmin(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .qmax(128)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
+  GemmMicrokernelTester()
+    .mr(4)
+    .nr(4)
+    .kr(1)
+    .sr(1)
+    .m(4)
+    .n(4)
+    .k(1)
+    .cm_stride(7)
+    .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_a_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_b_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
+}
+
+TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, no_zero_point) {
+  for (size_t k = 1; k <= 5; k += 2) {
+    GemmMicrokernelTester()
+      .mr(4)
+      .nr(4)
+      .kr(1)
+      .sr(1)
+      .m(4)
+      .n(4)
+      .k(k)
+      .a_zero_point(0)
+      .b_zero_point(0)
+      .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
+  }
 }
\ No newline at end of file
diff --git a/test/qu8-igemm-minmax-fp32.yaml b/test/qu8-igemm-minmax-fp32.yaml
index e38e345..9395b71 100644
--- a/test/qu8-igemm-minmax-fp32.yaml
+++ b/test/qu8-igemm-minmax-fp32.yaml
@@ -292,30 +292,6 @@
 - name: xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128
   init: xnn_init_qu8_conv_minmax_fp32_wasmsimd_params
   k-block: 8
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
-- name: xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf
-  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
-  k-block: 1
 - name: xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
@@ -340,3 +316,51 @@
 - name: xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic
   init: xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params
   k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1
+- name: xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf
+  init: xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params
+  k-block: 1