1x8 LD64 F32 GEMM
Simplified 1x8 float GEMM LD64 microkernel
And clean up prefetches in A75 kernel.
PiperOrigin-RevId: 305903285
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c948c5..d25b812 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1443,6 +1443,7 @@
src/f16-gemm/gen-inc/6x16inc-minmax-aarch64-neonfp16arith-ld32.S
src/f32-dwconv/up4x9-minmax-aarch64-neonfma-cortex-a55.S
src/f32-dwconv/up4x9-minmax-aarch64-neonfma.S
+ src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-ld64.S
src/f32-gemm/gen/1x12-minmax-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen/1x8-minmax-aarch64-neonfma-cortex-a57.S
@@ -1464,6 +1465,7 @@
src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ios.S
src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld128.S
src/f32-gemm/gen/6x8-minmax-aarch64-neonfma-ld64.S
+ src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-ld64.S
src/f32-gemm/gen-inc/1x12inc-minmax-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a53.S
src/f32-gemm/gen-inc/1x8inc-minmax-aarch64-neonfma-cortex-a57.S