4x16c4 RNDNU quantized Neon assembly GEMM/IGEMM microkernel.
- Was gemmlowp with BIC, SQRDMULH, SSRA, SRSHL
- Now rndnu with SSHL SQDMULH, SRSHL

PiperOrigin-RevId: 385845074
diff --git a/BUILD.bazel b/BUILD.bazel
index 20ee0f5..bab7b79 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4603,6 +4603,7 @@
     "src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S",
     "src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S",
     "src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld128.S",
+    "src/qs8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
     "src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S",
     "src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S",
     "src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S",
@@ -4631,6 +4632,7 @@
     "src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S",
     "src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S",
     "src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld128.S",
+    "src/qs8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
 ]
 
 INTERNAL_MICROKERNEL_HDRS = [