QS8 GEMM MRx4c8 SSE2/SSSE3/SSE4.1 microkernels

PiperOrigin-RevId: 324300862
diff --git a/bench/qs8-gemm.cc b/bench/qs8-gemm.cc
index 21a21f6..a6ef819 100644
--- a/bench/qs8-gemm.cc
+++ b/bench/qs8-gemm.cc
@@ -105,18 +105,30 @@
   static void qs8_gemm_4x4c2__sse2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2, 4, 4, 2, 1);
   }
-
   static void qs8_gemm_4x4c2__ssse3(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3, 4, 4, 2, 1);
   }
-
   static void qs8_gemm_4x4c2__sse41(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41, 4, 4, 2, 1);
   }
 
+  static void qs8_gemm_2x4c8__sse2(benchmark::State& state, const char* net) {
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2, 2, 4, 8, 1);
+  }
+  static void qs8_gemm_2x4c8__ssse3(benchmark::State& state, const char* net) {
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3, 2, 4, 8, 1);
+  }
+  static void qs8_gemm_2x4c8__sse41(benchmark::State& state, const char* net) {
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41, 2, 4, 8, 1);
+  }
+
   BENCHMARK_GEMM(qs8_gemm_4x4c2__sse2)
   BENCHMARK_GEMM(qs8_gemm_4x4c2__ssse3)
   BENCHMARK_GEMM(qs8_gemm_4x4c2__sse41)
+
+  BENCHMARK_GEMM(qs8_gemm_2x4c8__sse2)
+  BENCHMARK_GEMM(qs8_gemm_2x4c8__ssse3)
+  BENCHMARK_GEMM(qs8_gemm_2x4c8__sse41)
 #endif
 
 #ifndef XNNPACK_BENCHMARK_NO_MAIN