Add prefetch to MLA lane microkernel

PiperOrigin-RevId: 369516529
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4dcedc2..f53b62c 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -944,6 +944,7 @@
   src/qs8-gavgpool/gen/7x-minmax-neon-c16-acc2.c
   src/qs8-gavgpool/gen/7x-minmax-neon-c24-acc2.c
   src/qs8-gavgpool/gen/7x-minmax-neon-c32-acc2.c
+  src/qs8-gemm/gen/1x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/1x8-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/1x8-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x8c2-minmax-neon-mlal-padal-dup.c
@@ -951,6 +952,7 @@
   src/qs8-gemm/gen/1x8c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/1x8c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/1x8c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/1x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/1x16-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/1x16-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x16c2-minmax-neon-mlal-padal-dup.c
@@ -958,6 +960,7 @@
   src/qs8-gemm/gen/1x16c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/1x16c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/1x16c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/2x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/2x8-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/2x8-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/2x8c2-minmax-neon-mlal-padal-dup.c
@@ -965,6 +968,7 @@
   src/qs8-gemm/gen/2x8c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/2x8c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/2x8c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/2x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/2x16-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/2x16-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/2x16c2-minmax-neon-mlal-padal-dup.c
@@ -972,6 +976,7 @@
   src/qs8-gemm/gen/2x16c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/2x16c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/2x16c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/3x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/3x8-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/3x8-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/3x8c2-minmax-neon-mlal-padal-dup.c
@@ -979,6 +984,7 @@
   src/qs8-gemm/gen/3x8c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/3x8c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/3x8c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/3x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/3x16-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/3x16-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/3x16c2-minmax-neon-mlal-padal-dup.c
@@ -986,6 +992,7 @@
   src/qs8-gemm/gen/3x16c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/3x16c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/3x16c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/4x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/4x8-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/4x8-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/4x8c2-minmax-neon-mlal-padal-dup.c
@@ -993,6 +1000,7 @@
   src/qs8-gemm/gen/4x8c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/4x8c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/4x8c16-minmax-neon-mlal-padal.c
+  src/qs8-gemm/gen/4x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/4x16-minmax-neon-mlal-lane.c
   src/qs8-gemm/gen/4x16-minmax-neon-mull-addw-dup.c
   src/qs8-gemm/gen/4x16c2-minmax-neon-mlal-padal-dup.c
@@ -1000,6 +1008,7 @@
   src/qs8-gemm/gen/4x16c8-minmax-neon-mlal-padal.c
   src/qs8-gemm/gen/4x16c8-minmax-neon-mull-padal.c
   src/qs8-gemm/gen/4x16c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/1x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/1x8-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/1x8-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x8c2-minmax-neon-mlal-padal-dup.c
@@ -1007,6 +1016,7 @@
   src/qs8-igemm/gen/1x8c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/1x8c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/1x8c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/1x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/1x16-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/1x16-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x16c2-minmax-neon-mlal-padal-dup.c
@@ -1014,6 +1024,7 @@
   src/qs8-igemm/gen/1x16c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/1x16c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/1x16c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/2x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/2x8-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/2x8-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/2x8c2-minmax-neon-mlal-padal-dup.c
@@ -1021,6 +1032,7 @@
   src/qs8-igemm/gen/2x8c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/2x8c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/2x8c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/2x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/2x16-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/2x16-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/2x16c2-minmax-neon-mlal-padal-dup.c
@@ -1028,6 +1040,7 @@
   src/qs8-igemm/gen/2x16c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/2x16c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/2x16c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/3x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/3x8-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/3x8-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/3x8c2-minmax-neon-mlal-padal-dup.c
@@ -1035,6 +1048,7 @@
   src/qs8-igemm/gen/3x8c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/3x8c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/3x8c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/3x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/3x16-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/3x16-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/3x16c2-minmax-neon-mlal-padal-dup.c
@@ -1042,6 +1056,7 @@
   src/qs8-igemm/gen/3x16c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/3x16c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/3x16c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/4x8-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/4x8-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/4x8-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/4x8c2-minmax-neon-mlal-padal-dup.c
@@ -1049,6 +1064,7 @@
   src/qs8-igemm/gen/4x8c8-minmax-neon-mlal-padal.c
   src/qs8-igemm/gen/4x8c8-minmax-neon-mull-padal.c
   src/qs8-igemm/gen/4x8c16-minmax-neon-mlal-padal.c
+  src/qs8-igemm/gen/4x16-minmax-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/4x16-minmax-neon-mlal-lane.c
   src/qs8-igemm/gen/4x16-minmax-neon-mull-addw-dup.c
   src/qs8-igemm/gen/4x16c2-minmax-neon-mlal-padal-dup.c