Generate A57 micro-kernels from A75 source.
Remove A57 source generators and replace with A75 with prefetch removed.
Add missing 4x8 and 5x8 variations.
PiperOrigin-RevId: 285897229
diff --git a/bench/f32-gemm.cc b/bench/f32-gemm.cc
index 18cdb81..642bddf 100644
--- a/bench/f32-gemm.cc
+++ b/bench/f32-gemm.cc
@@ -378,6 +378,9 @@
static void f32_gemm_4x8__aarch64_neonfma_ld128(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_ld128, 4, 8, 1, 1);
}
+ static void f32_gemm_5x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_ukernel_5x8__aarch64_neonfma_cortex_a57, 5, 8, 1, 1);
+ }
static void f32_gemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_5x8__aarch64_neonfma_cortex_a75, 5, 8, 1, 1);
}
@@ -390,12 +393,12 @@
static void f32_gemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53, 6, 8, 1, 1);
}
- static void f32_gemm_6x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) {
- GEMMBenchmark(state, xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57, 6, 8, 1, 1);
- }
static void f32_gemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73, 6, 8, 1, 1);
}
+ static void f32_gemm_6x8__aarch64_neonfma_cortex_a57(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57, 6, 8, 1, 1);
+ }
static void f32_gemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75, 6, 8, 1, 1);
}
@@ -427,10 +430,11 @@
BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_cortex_a75)
BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_ld128)
BENCHMARK_GEMM(f32_gemm_4x8__aarch64_neonfma_ld64)
+ BENCHMARK_GEMM(f32_gemm_5x8__aarch64_neonfma_cortex_a57)
BENCHMARK_GEMM(f32_gemm_5x8__aarch64_neonfma_cortex_a75)
BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a53)
- BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a57)
BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a73)
+ BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a57)
BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_cortex_a75)
BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_ld64)
BENCHMARK_GEMM(f32_gemm_6x8__aarch64_neonfma_ld128)