Remove padal from quantized microkernel names.

- PADAL is the only variant for these microkernels.
- Remove from file names and function names.

PiperOrigin-RevId: 409461961
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d9802cd..2ab805a 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1139,21 +1139,21 @@
   src/qc8-dwconv/gen/up8x25-minmax-fp32-neon-mla8-ld64.c
   src/qc8-dwconv/gen/up16x9-minmax-fp32-neon-mla8-ld64.c
   src/qc8-dwconv/gen/up16x25-minmax-fp32-neon-mla8-ld64.c
-  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal-padal.c
+  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
   src/qs8-dwconv/gen/up8x25-minmax-rndnu-neon-mla8-ld64.c
   src/qs8-dwconv/gen/up16x9-minmax-rndnu-neon-mla8-ld64.c
   src/qs8-dwconv/gen/up16x25-minmax-rndnu-neon-mla8-ld64.c
   src/qs8-gavgpool/gen/7p7x-minmax-neon-c8-acc2.c
   src/qs8-gavgpool/gen/7x-minmax-neon-c8-acc2.c
-  src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mlal-padal.c
+  src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c
-  src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mlal-padal.c
+  src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c
-  src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mlal-padal.c
+  src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mlal.c
   src/qs8-vadd/gen/minmax-neon-ld64-x16.c
   src/qs8-vadd/gen/minmax-neon-ld64-x32.c
   src/qs8-vaddc/gen/minmax-neon-ld64-x16.c
@@ -1507,29 +1507,29 @@
   src/qc8-dwconv/gen/up24x25-minmax-fp32-neon-mul16.c
   src/qc8-dwconv/gen/up32x9-minmax-fp32-neon-mul16.c
   src/qc8-dwconv/gen/up32x25-minmax-fp32-neon-mul16.c
-  src/qc8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-gemm/gen/1x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-gemm/gen/1x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-neon-mlal-padal.c
+  src/qc8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-gemm/gen/1x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qc8-gemm/gen/1x8c4s2-minmax-fp32-neon-mlal.c
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-neon-mlal.c
   src/qc8-gemm/gen/1x16-minmax-fp32-neon-mlal-lane.c
-  src/qc8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-gemm/gen/2x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-gemm/gen/2x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-neon-mlal-padal.c
+  src/qc8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-gemm/gen/2x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qc8-gemm/gen/2x8c4s2-minmax-fp32-neon-mlal.c
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-neon-mlal.c
   src/qc8-gemm/gen/4x16-minmax-fp32-neon-mlal-lane.c
-  src/qc8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-igemm/gen/1x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-igemm/gen/1x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-neon-mlal-padal.c
+  src/qc8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-igemm/gen/1x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qc8-igemm/gen/1x8c4s2-minmax-fp32-neon-mlal.c
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-neon-mlal.c
   src/qc8-igemm/gen/1x16-minmax-fp32-neon-mlal-lane.c
-  src/qc8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qc8-igemm/gen/2x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qc8-igemm/gen/2x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-neon-mlal-padal.c
+  src/qc8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
+  src/qc8-igemm/gen/2x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qc8-igemm/gen/2x8c4s2-minmax-fp32-neon-mlal.c
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-neon-mlal.c
   src/qc8-igemm/gen/4x16-minmax-fp32-neon-mlal-lane.c
   src/qs8-dwconv/gen/up8x9-minmax-fp32-neon-mul16.c
   src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
@@ -1578,255 +1578,255 @@
   src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/1x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/1x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/1x8c8-minmax-fp32-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/1x8c8-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
+  src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/1x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/1x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/1x8c4s2-minmax-fp32-neon-mlal.c
+  src/qs8-gemm/gen/1x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/1x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-neon-mlal.c
+  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/1x8c8-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/1x16-minmax-fp32-neon-mlal-lane.c
   src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/1x16-minmax-rndnu-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/1x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/1x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/1x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/1x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/1x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/1x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/1x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/1x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/1x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/1x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/2x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/2x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/2x8c8-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
+  src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/2x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/2x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/2x8c4s2-minmax-fp32-neon-mlal.c
+  src/qs8-gemm/gen/2x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/2x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-neon-mlal.c
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/2x8c8-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/2x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/2x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/2x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/2x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/2x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/2x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/2x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/2x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/2x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/3x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/3x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/3x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/3x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/3x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/3x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/3x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/3x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/3x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/3x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/3x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/3x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/3x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/3x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/3x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/3x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/3x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/3x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/3x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/3x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/3x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/3x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/3x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/3x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/4x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/4x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/4x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/4x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/4x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/4x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/4x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/4x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/4x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/4x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/4x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/4x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/4x16-minmax-fp32-neon-mlal-lane.c
   src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/4x16-minmax-rndnu-neon-mlal-lane-prfm.c
   src/qs8-gemm/gen/4x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/4x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/4x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/4x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/4x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-gemm/gen/4x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-gemm/gen/4x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-gemm/gen/4x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/4x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/4x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/4x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/4x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-gemm/gen/4x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-gemm/gen/4x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-gemm/gen/4x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/1x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/1x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/1x8c8-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/1x8c2s4-minmax-fp32-neon-mlal.c
+  src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/1x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/1x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/1x8c4s2-minmax-fp32-neon-mlal.c
+  src/qs8-igemm/gen/1x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/1x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-neon-mlal.c
+  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/1x8c8-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/1x16-minmax-fp32-neon-mlal-lane.c
   src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/1x16-minmax-rndnu-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/1x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/1x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/1x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/1x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/1x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/1x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/1x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/1x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/1x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/1x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/1x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/2x8c4-minmax-fp32-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/2x8c4s2-minmax-fp32-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/2x8c8-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/2x8c2s4-minmax-fp32-neon-mlal.c
+  src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/2x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/2x8c4-minmax-fp32-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/2x8c4s2-minmax-fp32-neon-mlal.c
+  src/qs8-igemm/gen/2x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/2x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-neon-mlal.c
+  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/2x8c8-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/2x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/2x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/2x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/2x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/2x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/2x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/2x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/2x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/2x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/3x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/3x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/3x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/3x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/3x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/3x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/3x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/3x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/3x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/3x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/3x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/3x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/3x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/3x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/3x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/3x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/3x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/3x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/3x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/3x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/3x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/3x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/3x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/3x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/4x8c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/4x8c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/4x8c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/4x8c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/4x8c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/4x8c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/4x8c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/4x8c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/4x8c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/4x8c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/4x8c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/4x8c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/4x16-minmax-fp32-neon-mlal-lane.c
   src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/4x16-minmax-rndnu-neon-mlal-lane-prfm.c
   src/qs8-igemm/gen/4x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/4x16-minmax-rndnu-neon-mull-addw-dup.c
-  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/4x16c2s4-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/4x16c2s4-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/4x16c4-minmax-rndnu-neon-mlal-padal-dup.c
-  src/qs8-igemm/gen/4x16c4-minmax-rndnu-neon-mull-padal-dup.c
-  src/qs8-igemm/gen/4x16c4s2-minmax-rndnu-neon-mlal-padal.c
-  src/qs8-igemm/gen/4x16c4s2-minmax-rndnu-neon-mull-padal.c
-  src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
-  src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
-  src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/4x16c2-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/4x16c2s4-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/4x16c2s4-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/4x16c4-minmax-rndnu-neon-mlal-dup.c
+  src/qs8-igemm/gen/4x16c4-minmax-rndnu-neon-mull-dup.c
+  src/qs8-igemm/gen/4x16c4s2-minmax-rndnu-neon-mlal.c
+  src/qs8-igemm/gen/4x16c4s2-minmax-rndnu-neon-mull.c
+  src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal.c
+  src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull.c
+  src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-requantization/fp32-neon.c
@@ -2325,17 +2325,17 @@
   src/qc8-dwconv/gen/up8x25-minmax-fp32-neonv8-mla8-ld64.c
   src/qc8-dwconv/gen/up16x9-minmax-fp32-neonv8-mla8-ld64.c
   src/qc8-dwconv/gen/up16x25-minmax-fp32-neonv8-mla8-ld64.c
-  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
   src/qs8-vmul/gen/minmax-fp32-neonv8-ld64-x16.c
   src/qs8-vmulc/gen/minmax-fp32-neonv8-ld64-x16.c
@@ -2375,29 +2375,29 @@
   src/qc8-dwconv/gen/up24x25-minmax-fp32-neonv8-mul16.c
   src/qc8-dwconv/gen/up32x9-minmax-fp32-neonv8-mul16.c
   src/qc8-dwconv/gen/up32x25-minmax-fp32-neonv8-mul16.c
-  src/qc8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-gemm/gen/1x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-gemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-gemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-gemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qc8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-gemm/gen/2x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-gemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-gemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-gemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-gemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qc8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-igemm/gen/1x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-igemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-igemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-igemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qc8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-igemm/gen/2x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qc8-igemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qc8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qc8-igemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qc8-igemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qc8-igemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
   src/qs8-dwconv/gen/up8x9-minmax-fp32-neonv8-mul16.c
   src/qs8-dwconv/gen/up8x25-minmax-fp32-neonv8-mul16.c
@@ -2407,29 +2407,29 @@
   src/qs8-dwconv/gen/up24x25-minmax-fp32-neonv8-mul16.c
   src/qs8-dwconv/gen/up32x9-minmax-fp32-neonv8-mul16.c
   src/qs8-dwconv/gen/up32x25-minmax-fp32-neonv8-mul16.c
-  src/qs8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-gemm/gen/1x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-gemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qs8-gemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-gemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qs8-gemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-gemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-gemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qs8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-gemm/gen/2x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-gemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qs8-gemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-gemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qs8-gemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-gemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-gemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qs8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-igemm/gen/1x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-igemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qs8-igemm/gen/1x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-igemm/gen/1x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qs8-igemm/gen/1x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-igemm/gen/1x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-igemm/gen/1x16-minmax-fp32-neonv8-mlal-lane.c
-  src/qs8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-igemm/gen/2x8c4-minmax-fp32-neonv8-mlal-padal-dup.c
-  src/qs8-igemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal-padal.c
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-neonv8-mlal-padal.c
+  src/qs8-igemm/gen/2x8c2-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-igemm/gen/2x8c2s4-minmax-fp32-neonv8-mlal.c
+  src/qs8-igemm/gen/2x8c4-minmax-fp32-neonv8-mlal-dup.c
+  src/qs8-igemm/gen/2x8c4s2-minmax-fp32-neonv8-mlal.c
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-neonv8-mlal.c
   src/qs8-igemm/gen/4x16-minmax-fp32-neonv8-mlal-lane.c
   src/qs8-vmul/gen/minmax-fp32-neonv8-ld64-x8.c
   src/qs8-vmul/gen/minmax-fp32-neonv8-ld64-x16.c
@@ -5028,74 +5028,74 @@
   src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld64.S
   src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-ld128.S
   src/f32-igemm/gen/6x8-minmax-aarch64-neonfma-prfm-cortex-a75.S
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qc8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal.S
   src/qc8-gemm/gen/1x16c4-minmax-fp32-aarch64-neondot-ld32.S
   src/qc8-gemm/gen/1x16c4-minmax-fp32-aarch64-neondot-ld64.S
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mull-padal.S
-  src/qc8-gemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal-padal.S
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qc8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mull.S
+  src/qc8-gemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal.S
   src/qc8-gemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-cortex-a53.S
   src/qc8-gemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-prfm-cortex-a53.S
   src/qc8-gemm/gen/4x16c4-minmax-fp32-aarch64-neondot-cortex-a55.S
   src/qc8-gemm/gen/4x16c4-minmax-fp32-aarch64-neondot-ld32.S
   src/qc8-gemm/gen/4x16c4-minmax-fp32-aarch64-neondot-ld64.S
   src/qc8-gemm/gen/4x16c4-minmax-fp32-aarch64-neondot-ld128.S
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qc8-igemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal-padal.S
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qc8-igemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal.S
   src/qc8-igemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-cortex-a53.S
   src/qc8-igemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-prfm-cortex-a53.S
   src/qc8-igemm/gen/4x16c4-minmax-fp32-aarch64-neondot-cortex-a55.S
   src/qc8-igemm/gen/4x16c4-minmax-fp32-aarch64-neondot-ld64.S
   src/qc8-igemm/gen/4x16c4-minmax-fp32-aarch64-neondot-ld128.S
-  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal.S
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm.S
+  src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-prfm.S
+  src/qs8-gemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal.S
   src/qs8-gemm/gen/1x16c4-minmax-fp32-aarch64-neondot-ld32.S
   src/qs8-gemm/gen/1x16c4-minmax-fp32-aarch64-neondot-ld64.S
   src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
   src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
   src/qs8-gemm/gen/1x16c4-minmax-rndnu-aarch64-neondot-ld32.S
   src/qs8-gemm/gen/1x16c4-minmax-rndnu-aarch64-neondot-ld64.S
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mull-padal.S
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
-  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mull-padal.S
-  src/qs8-gemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
-  src/qs8-gemm/gen/2x8c16-minmax-rndnu-aarch64-neon-mlal-padal.S
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-aarch64-neon-mull.S
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm.S
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mull.S
+  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-prfm.S
+  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mull.S
+  src/qs8-gemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-aarch64-neon-mlal.S
+  src/qs8-gemm/gen/2x8c16-minmax-rndnu-aarch64-neon-mlal.S
   src/qs8-gemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-cortex-a53.S
   src/qs8-gemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-prfm-cortex-a53.S
   src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
@@ -5114,33 +5114,33 @@
   src/qs8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld32.S
   src/qs8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld64.S
   src/qs8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal-cortex-a53.S
-  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm-cortex-a53.S
-  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal-prfm.S
-  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
-  src/qs8-igemm/gen/2x8c16-minmax-rndnu-aarch64-neon-mlal-padal.S
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm.S
+  src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal-prfm.S
+  src/qs8-igemm/gen/1x8c8-minmax-rndnu-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal-prfm.S
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-prfm.S
+  src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-cortex-a53.S
+  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-prfm-cortex-a53.S
+  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal-prfm.S
+  src/qs8-igemm/gen/2x8c8-minmax-rndnu-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/2x8c16-minmax-fp32-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-aarch64-neon-mlal.S
+  src/qs8-igemm/gen/2x8c16-minmax-rndnu-aarch64-neon-mlal.S
   src/qs8-igemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-cortex-a53.S
   src/qs8-igemm/gen/4x16-minmax-fp32-aarch64-neon-mlal-lane-prfm-cortex-a53.S
   src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S