Enable PLD prefetch version of a75 microkernel for aarch32
on Cortex A57, Cortex A72, Cortex A73, Exynos M1 and Exynos M2
A57 Nexus 6P
Was
MobileNetV1/T:1/real_time 164837 us 163904 us 8 Freq=1.9584G
MobileNetV2/T:1/real_time 97915 us 97336 us 13 Freq=1.9584G
MobileNetV3Large/T:1/real_time 76266 us 75770 us 18 Freq=1.9584G
MobileNetV3Small/T:1/real_time 24066 us 23902 us 59 Freq=1.9584G
Now
MobileNetV1/T:1/real_time 133361 us 132568 us 9 Freq=1.9584G
MobileNetV2/T:1/real_time 85308 us 84727 us 14 Freq=1.9584G
MobileNetV3Large/T:1/real_time 71922 us 71471 us 20 Freq=1.9584G
MobileNetV3Small/T:1/real_time 23726 us 23569 us 59 Freq=1.9584G
A72
Was
MobileNetV1/T:1/real_time 150548 us 150546 us 11 Freq=2.516G
MobileNetV2/T:1/real_time 66435 us 66430 us 21 Freq=2.516G
MobileNetV3Large/T:1/real_time 51950 us 51949 us 27 Freq=2.516G
MobileNetV3Small/T:1/real_time 17160 us 17159 us 82 Freq=2.516G
Now
MobileNetV1/T:1/real_time 113302 us 113293 us 13 Freq=2.516G
MobileNetV2/T:1/real_time 64918 us 64914 us 22 Freq=2.516G
MobileNetV3Large/T:1/real_time 51061 us 51024 us 28 Freq=2.516G
MobileNetV3Small/T:1/real_time 17124 us 17122 us 83 Freq=2.516G
A73 Pixel 2
Was
MobileNetV1/T:1/real_time 120831 us 120080 us 10 Freq=2.4576G
MobileNetV2/T:1/real_time 65185 us 64704 us 21 Freq=2.4576G
MobileNetV3Large/T:1/real_time 51117 us 50699 us 27 Freq=2.4576G
MobileNetV3Small/T:1/real_time 16777 us 16610 us 83 Freq=2.4576G
Now
MobileNetV1/T:1/real_time 116305 us 115524 us 11 Freq=2.4576G
MobileNetV2/T:1/real_time 64558 us 64091 us 22 Freq=2.4576G
MobileNetV3Large/T:1/real_time 51401 us 51020 us 27 Freq=2.4576G
MobileNetV3Small/T:1/real_time 16762 us 16630 us 84 Freq=2.4576G
PiperOrigin-RevId: 285288947
diff --git a/src/init.c b/src/init.c
index 5e9218f..98ffe80 100644
--- a/src/init.c
+++ b/src/init.c
@@ -144,6 +144,20 @@
.nr = 8,
};
break;
+
+ case cpuinfo_uarch_cortex_a57:
+ case cpuinfo_uarch_cortex_a72:
+ case cpuinfo_uarch_cortex_a73:
+ xnn_params.f32.gemm = (struct gemm_parameters) {
+ .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_pld_cortex_a75,
+ .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_4x8__neon_lane_ld128,
+ .gemm1 = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_1x8__neon_lane_ld64,
+ .igemm1 = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_1x8__neon_lane_ld64,
+ .mr = 4,
+ .nr = 8,
+ };
+ break;
+
default:
xnn_params.f32.gemm = (struct gemm_parameters) {
.gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75,