LD1R and LD2R variants of c2 microkernel

- Instead of 1 LD1 and 4 DUP, use 4 LD1R or 2 LD2R

PiperOrigin-RevId: 410613731
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 809f82e..0882f50 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1508,6 +1508,8 @@
   src/qc8-dwconv/gen/up32x9-minmax-fp32-neon-mul16.c
   src/qc8-dwconv/gen/up32x25-minmax-fp32-neon-mul16.c
   src/qc8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qc8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qc8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
   src/qc8-gemm/gen/1x8c4-minmax-fp32-neon-mlal-dup.c
@@ -1515,6 +1517,8 @@
   src/qc8-gemm/gen/1x8c8-minmax-fp32-neon-mlal.c
   src/qc8-gemm/gen/1x16-minmax-fp32-neon-mlal-lane.c
   src/qc8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qc8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qc8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
   src/qc8-gemm/gen/2x8c4-minmax-fp32-neon-mlal-dup.c
@@ -1522,6 +1526,8 @@
   src/qc8-gemm/gen/2x8c8-minmax-fp32-neon-mlal.c
   src/qc8-gemm/gen/4x16-minmax-fp32-neon-mlal-lane.c
   src/qc8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qc8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qc8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
   src/qc8-igemm/gen/1x8c4-minmax-fp32-neon-mlal-dup.c
@@ -1529,6 +1535,8 @@
   src/qc8-igemm/gen/1x8c8-minmax-fp32-neon-mlal.c
   src/qc8-igemm/gen/1x16-minmax-fp32-neon-mlal-lane.c
   src/qc8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qc8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qc8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
   src/qc8-igemm/gen/2x8c4-minmax-fp32-neon-mlal-dup.c
@@ -1583,11 +1591,17 @@
   src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-dup.c
   src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
   src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mlal.c
@@ -1609,8 +1623,12 @@
   src/qs8-gemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/1x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/1x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/1x16c2s4-minmax-rndnu-neon-mull.c
@@ -1624,11 +1642,17 @@
   src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-dup.c
   src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
   src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mlal.c
@@ -1647,8 +1671,12 @@
   src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/2x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/2x16c2s4-minmax-rndnu-neon-mull.c
@@ -1662,8 +1690,12 @@
   src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/3x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/3x8c2s4-minmax-rndnu-neon-mull.c
@@ -1677,8 +1709,12 @@
   src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/3x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/3x16c2s4-minmax-rndnu-neon-mull.c
@@ -1692,8 +1728,12 @@
   src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/4x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/4x8c2s4-minmax-rndnu-neon-mull.c
@@ -1710,8 +1750,12 @@
   src/qs8-gemm/gen/4x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/4x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-gemm/gen/4x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/4x16c2s4-minmax-rndnu-neon-mull.c
@@ -1728,11 +1772,17 @@
   src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-dup.c
   src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
   src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mlal.c
@@ -1754,8 +1804,12 @@
   src/qs8-igemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/1x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/1x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/1x16c2s4-minmax-rndnu-neon-mull.c
@@ -1769,11 +1823,17 @@
   src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld2r.c
   src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld4r.c
   src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-dup.c
   src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
   src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mlal.c
@@ -1792,8 +1852,12 @@
   src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/2x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/2x16c2s4-minmax-rndnu-neon-mull.c
@@ -1807,8 +1871,12 @@
   src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/3x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/3x8c2s4-minmax-rndnu-neon-mull.c
@@ -1822,8 +1890,12 @@
   src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/3x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/3x16c2s4-minmax-rndnu-neon-mull.c
@@ -1837,8 +1909,12 @@
   src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/4x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/4x8c2s4-minmax-rndnu-neon-mull.c
@@ -1855,8 +1931,12 @@
   src/qs8-igemm/gen/4x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/4x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mlal-ld1r.c
+  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mlal-ld2r.c
   src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mlal-ld4r.c
   src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mull-ld1r.c
+  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mull-ld2r.c
   src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mull-ld4r.c
   src/qs8-igemm/gen/4x16c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/4x16c2s4-minmax-rndnu-neon-mull.c
@@ -2416,6 +2496,8 @@
   src/qc8-dwconv/gen/up32x9-minmax-fp32-neonv8-mul16.c
   src/qc8-dwconv/gen/up32x25-minmax-fp32-neonv8-mul16.c
   src/qc8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qc8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qc8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2423,6 +2505,8 @@
   src/qc8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
   src/qc8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qc8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qc8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2430,6 +2514,8 @@
   src/qc8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
   src/qc8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qc8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qc8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2437,6 +2523,8 @@
   src/qc8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
   src/qc8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qc8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qc8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2452,6 +2540,8 @@
   src/qs8-dwconv/gen/up32x9-minmax-fp32-neonv8-mul16.c
   src/qs8-dwconv/gen/up32x25-minmax-fp32-neonv8-mul16.c
   src/qs8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qs8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qs8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qs8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qs8-gemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2459,6 +2549,8 @@
   src/qs8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-gemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
   src/qs8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qs8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qs8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qs8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qs8-gemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2466,6 +2558,8 @@
   src/qs8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-gemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
   src/qs8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qs8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qs8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qs8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qs8-igemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
@@ -2473,6 +2567,8 @@
   src/qs8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-igemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
   src/qs8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld1r.c
+  src/qs8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld2r.c
   src/qs8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-ld4r.c
   src/qs8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
   src/qs8-igemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c