QS8 e2e benchmark add all Web Assembly microkernels.

PiperOrigin-RevId: 356675404
diff --git a/bench/qs8-gemm-e2e.cc b/bench/qs8-gemm-e2e.cc
index f02adb5..c7e123b 100644
--- a/bench/qs8-gemm-e2e.cc
+++ b/bench/qs8-gemm-e2e.cc
@@ -276,6 +276,69 @@
   BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_12x8c4__neondot);
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
+#if XNN_ARCH_WASMSIMD
+  static void qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+    GEMMEnd2EndBenchmark(state, model,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      1 /* mr */, 4 /* nr */, 3 /* log2_kr */);
+  }
+
+  static void qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+    GEMMEnd2EndBenchmark(state, model,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      1 /* mr */, 4 /* nr */, 3 /* log2_kr */);
+  }
+
+  static void qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+    GEMMEnd2EndBenchmark(state, model,
+      xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
+  }
+
+  static void qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+    GEMMEnd2EndBenchmark(state, model,
+      xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
+  }
+
+  static void qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+    GEMMEnd2EndBenchmark(state, model,
+      xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
+  }
+
+  static void qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+    GEMMEnd2EndBenchmark(state, model,
+      xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
+  }
+
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128)
+#endif  // XNN_ARCH_WASMSIMD
+
 #if SCALAR_IGEMM
 static void qs8_gemm_minmax_ukernel_8x8c4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,