QS8 1x16c4 ld32 GEMM microkernel using NEON dot product
PiperOrigin-RevId: 360285164
diff --git a/bench/qs8-gemm.cc b/bench/qs8-gemm.cc
index 09779ad..9e740d2 100644
--- a/bench/qs8-gemm.cc
+++ b/bench/qs8-gemm.cc
@@ -516,22 +516,26 @@
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
#if XNN_ARCH_ARM64
+ static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1, benchmark::utils::CheckNEONDOT);
+ }
+ static void qs8_gemm_1x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1, benchmark::utils::CheckNEONDOT);
+ }
static void qs8_gemm_1x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1, benchmark::utils::CheckNEONDOT);
}
- static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
- GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1, benchmark::utils::CheckNEONDOT);
- }
static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1, benchmark::utils::CheckNEONDOT);
}
- static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) {
- GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1, benchmark::utils::CheckNEONDOT);
+ static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1, benchmark::utils::CheckNEONDOT);
}
+ BENCHMARK_GEMM(qs8_gemm_1x16c4__aarch64_neondot_ld32)
BENCHMARK_GEMM(qs8_gemm_1x16c4__aarch64_neondot_ld64)
- BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_ld64)
BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_ld32)
+ BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_ld64)
BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_cortex_a55)
#endif // XNN_ARCH_ARM64