Cortex A76 use 6x8 micro kernel
On big core
A75 kernel is 0.3% faster with 6x8 than 4x8
A53 kernel is 11.2% faster with 6x8 than 4x8
On little core
A75 kernel is 6.4% faster with 6x8 than 4x8
A53 kernel is 9.2% faster with 6x8 than 4x8
PiperOrigin-RevId: 277156128
diff --git a/src/init.c b/src/init.c
index 0fbc33b..8f6975c 100644
--- a/src/init.c
+++ b/src/init.c
@@ -296,7 +296,6 @@
};
break;
case cpuinfo_uarch_cortex_a72:
- case cpuinfo_uarch_cortex_a76:
xnn_params.f32.gemm = (struct gemm_parameters) {
.gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
.igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__aarch64_neonfma_cortex_a75,
@@ -307,6 +306,7 @@
};
break;
case cpuinfo_uarch_cortex_a75:
+ case cpuinfo_uarch_cortex_a76:
case cpuinfo_uarch_mongoose_m1:
case cpuinfo_uarch_mongoose_m2:
case cpuinfo_uarch_meerkat_m3: