RNDNU versions of all Neon lane microkernels.

- For all gemmlowp microkernels, add an rndnu version.

PiperOrigin-RevId: 412148785
diff --git a/BUILD.bazel b/BUILD.bazel
index 7395758..dbacef4 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -2588,6 +2588,8 @@
     "src/qs8-gavgpool/gen/7x-minmax-neon-c32-acc2.c",
     "src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c",
     "src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c",
     "src/qs8-gemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c",
@@ -2649,6 +2651,8 @@
     "src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c",
     "src/qs8-gemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c",
@@ -2684,6 +2688,8 @@
     "src/qs8-gemm/gen/2x8c8-minmax-rndnu-neon-mlal.c",
     "src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-gemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2707,6 +2713,8 @@
     "src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-gemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2730,6 +2738,8 @@
     "src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-gemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2753,6 +2763,8 @@
     "src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-gemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2802,9 +2814,15 @@
     "src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/6x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/6x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/6x16-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-gemm/gen/6x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c",
     "src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c",
+    "src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/1x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-dup.c",
     "src/qs8-igemm/gen/1x8c2-minmax-fp32-neon-mlal-ld1r.c",
@@ -2866,6 +2884,8 @@
     "src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/2x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-dup.c",
     "src/qs8-igemm/gen/2x8c2-minmax-fp32-neon-mlal-ld1r.c",
@@ -2901,6 +2921,8 @@
     "src/qs8-igemm/gen/2x8c8-minmax-rndnu-neon-mlal.c",
     "src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/2x16-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-igemm/gen/2x16c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2924,6 +2946,8 @@
     "src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/3x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-igemm/gen/3x8c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2947,6 +2971,8 @@
     "src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/3x16-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-igemm/gen/3x16c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -2970,6 +2996,8 @@
     "src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/4x8-minmax-rndnu-neon-mull-addw-dup.c",
     "src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-dup.c",
     "src/qs8-igemm/gen/4x8c2-minmax-rndnu-neon-mlal-ld1r.c",
@@ -3019,7 +3047,11 @@
     "src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull.c",
     "src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal.c",
     "src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/6x8-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/6x8-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c",
+    "src/qs8-igemm/gen/6x16-minmax-rndnu-neon-mlal-lane-prfm.c",
+    "src/qs8-igemm/gen/6x16-minmax-rndnu-neon-mlal-lane.c",
     "src/qs8-requantization/fp32-neon.c",
     "src/qs8-requantization/gemmlowp-neon.c",
     "src/qs8-requantization/rndna-neon.c",