4x16c4 RNDNU quantized Neon assembly GEMM/IGEMM microkernel.
- Was gemmlowp with BIC, SQRDMULH, SSRA, SRSHL
- Now rndnu with SSHL SQDMULH, SRSHL

PiperOrigin-RevId: 385845074
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 49f5913..90395eb 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3769,6 +3769,7 @@
   src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
   src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
   src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld128.S
+  src/qs8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S
   src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-cortex-a53.S
   src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm-cortex-a53.S
   src/qs8-igemm/gen/1x8c8-minmax-fp32-aarch64-neon-mlal-padal-prfm.S
@@ -3796,7 +3797,8 @@
   src/qs8-igemm/gen/4x16c4-minmax-fp32-aarch64-neondot-ld128.S
   src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
   src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
-  src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld128.S)
+  src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-aarch64-neondot-ld128.S
+  src/qs8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S)
 
 SET(XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SCALAR_MICROKERNEL_SRCS})
 IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")