FP16 4x8, 6x8 and 1x8 GEMM ld64 microkernels
PiperOrigin-RevId: 306697529
diff --git a/bench/f16-gemm.cc b/bench/f16-gemm.cc
index 9585f22..74a8564 100644
--- a/bench/f16-gemm.cc
+++ b/bench/f16-gemm.cc
@@ -136,11 +136,26 @@
GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, 6, 16, 1);
}
+ static void f16_gemm_1x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, 1, 8, 1);
+ }
+
+ static void f16_gemm_4x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, 4, 8, 1);
+ }
+
+ static void f16_gemm_6x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, 6, 8, 1);
+ }
+
BENCHMARK_GEMM(f16_gemm_1x16__aarch64_neonfp16arith_ld32)
BENCHMARK_GEMM(f16_gemm_4x16__aarch64_neonfp16arith_ld32)
BENCHMARK_GEMM(f16_gemm_6x16__aarch64_neonfp16arith_ld32)
-
+ BENCHMARK_GEMM(f16_gemm_1x8__aarch64_neonfp16arith_ld64)
+ BENCHMARK_GEMM(f16_gemm_4x8__aarch64_neonfp16arith_ld64)
+ BENCHMARK_GEMM(f16_gemm_6x8__aarch64_neonfp16arith_ld64)
#endif // XNN_ARCH_ARM64
+
#ifndef XNNPACK_BENCHMARK_NO_MAIN
BENCHMARK_MAIN();
#endif