RNDNU versions of all Neon lane microkernels.

- For all gemmlowp microkernels, add an rndnu version.

PiperOrigin-RevId: 412148785
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 931cb47..9456659 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1609,6 +1609,8 @@
   src/qs8-gavgpool/gen/7x-minmax-neon-c32-acc2.c
   src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+  src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
   src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c
@@ -1670,6 +1672,8 @@
   src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
   src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c
@@ -1705,6 +1709,8 @@
   src/qs8-gemm/gen/2x8c8-minmax-rndnu-neon-mlal.c
   src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1728,6 +1734,8 @@
   src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1751,6 +1759,8 @@
   src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull.c
   src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1774,6 +1784,8 @@
   src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1823,9 +1835,15 @@
   src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/6x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/6x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-gemm/gen/6x16-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-gemm/gen/6x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
   src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+  src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c
   src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c
@@ -1887,6 +1905,8 @@
   src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c
   src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c
@@ -1922,6 +1942,8 @@
   src/qs8-igemm/gen/2x8c8-minmax-rndnu-neon-mlal.c
   src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1945,6 +1967,8 @@
   src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1968,6 +1992,8 @@
   src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull.c
   src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -1991,6 +2017,8 @@
   src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c
   src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c
   src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld1r.c
@@ -2040,7 +2068,11 @@
   src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull.c
   src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal.c
   src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/6x8-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/6x8-minmax-rndnu-neon-mlal-lane.c
   src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+  src/qs8-igemm/gen/6x16-minmax-rndnu-neon-mlal-lane-prfm.c
+  src/qs8-igemm/gen/6x16-minmax-rndnu-neon-mlal-lane.c
   src/qs8-requantization/fp32-neon.c
   src/qs8-requantization/gemmlowp-neon.c
   src/qs8-requantization/rndna-neon.c