QU8 C4 NEON Dot Product GEMM/IGEMM microkernels

- 2 dot products per vector.  A * W and A * zero_point.
- unsigned dot products with 2 sets of accumulators.
- subtract zero point accumulators from accumulators outside loop.
- 1x8, 4x8, 6x8, 8x8, 1x16, 4x16, 6x16, 8x16 GEMM and IGEMM.

PiperOrigin-RevId: 390067497
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3dd4eba..911ea99 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2347,7 +2347,23 @@
   src/qs8-igemm/gen/6x8c4-minmax-rndnu-neondot.c
   src/qs8-igemm/gen/6x16c4-minmax-rndnu-neondot.c
   src/qs8-igemm/gen/8x8c4-minmax-rndnu-neondot.c
-  src/qs8-igemm/gen/8x16c4-minmax-rndnu-neondot.c)
+  src/qs8-igemm/gen/8x16c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/1x8c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/1x16c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/4x8c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/4x16c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/6x8c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/6x16c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/8x8c4-minmax-rndnu-neondot.c
+  src/qu8-gemm/gen/8x16c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/1x8c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/1x16c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/4x8c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/4x16c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/6x8c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/6x16c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/8x8c4-minmax-rndnu-neondot.c
+  src/qu8-igemm/gen/8x16c4-minmax-rndnu-neondot.c)
 
 SET(PROD_SSE_MICROKERNEL_SRCS
   src/f32-avgpool/9p8x-minmax-sse-c4.c