QS8 Neon GEMM microkernel with 8 bit multiply
PiperOrigin-RevId: 351893800
diff --git a/bench/qs8-gemm.cc b/bench/qs8-gemm.cc
index a231fce..63e09f3 100644
--- a/bench/qs8-gemm.cc
+++ b/bench/qs8-gemm.cc
@@ -296,6 +296,18 @@
static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1, benchmark::utils::CheckNEON);
}
+ static void qs8_gemm_2x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, 2, 8, 1, 1, benchmark::utils::CheckNEON);
+ }
+ static void qs8_gemm_4x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, 4, 8, 1, 1, benchmark::utils::CheckNEON);
+ }
+ static void qs8_gemm_2x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, 2, 16, 1, 1, benchmark::utils::CheckNEON);
+ }
+ static void qs8_gemm_4x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
+ GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, 4, 16, 1, 1, benchmark::utils::CheckNEON);
+ }
static void qs8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) {
GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, 1, 8, 4, 1, benchmark::utils::CheckNEONDOT);
}
@@ -328,6 +340,10 @@
BENCHMARK_GEMM(qs8_gemm_4x8__neon_mlal_lane)
BENCHMARK_GEMM(qs8_gemm_2x16__neon_mlal_lane)
BENCHMARK_GEMM(qs8_gemm_4x16__neon_mlal_lane)
+ BENCHMARK_GEMM(qs8_gemm_2x8__neon_mull_addw_dup)
+ BENCHMARK_GEMM(qs8_gemm_4x8__neon_mull_addw_dup)
+ BENCHMARK_GEMM(qs8_gemm_2x16__neon_mull_addw_dup)
+ BENCHMARK_GEMM(qs8_gemm_4x16__neon_mull_addw_dup)
BENCHMARK_GEMM(qs8_gemm_1x8c4__neondot)
BENCHMARK_GEMM(qs8_gemm_4x8c4__neondot)
BENCHMARK_GEMM(qs8_gemm_6x8c4__neondot)