4x8 A53 GEMM, and GEMMINC unpipelined microkernels.
PiperOrigin-RevId: 276743130
diff --git a/bench/f32-gemm.cc b/bench/f32-gemm.cc
index 15eacbb..f2a3973 100644
--- a/bench/f32-gemm.cc
+++ b/bench/f32-gemm.cc
@@ -351,6 +351,9 @@
static void sgemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53, 4, 12, 1, 1);
}
+ static void sgemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a53, 4, 8, 1, 1);
+ }
static void sgemm_4x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57, 4, 8, 1, 1);
}
@@ -390,6 +393,7 @@
BENCHMARK_GEMM(sgemm_1x8__aarch64_neonfma_cortex_a57)
BENCHMARK_GEMM(sgemm_1x8__aarch64_neonfma_cortex_a75)
BENCHMARK_GEMM(sgemm_4x12__aarch64_neonfma_cortex_a53)
+ BENCHMARK_GEMM(sgemm_4x8__aarch64_neonfma_cortex_a53)
BENCHMARK_GEMM(sgemm_4x8__aarch64_neonfma_cortex_a57)
BENCHMARK_GEMM(sgemm_4x8__aarch64_neonfma_cortex_a75)
BENCHMARK_GEMM(sgemm_4x8__aarch64_neonfma_ld128)