Enable QU8 AAarch microkernels based on uarch

- based on initialization used for QS8, select the same microkernels for QU8 when available
- dot product use 4x8 intrinsics.  Was 2x16.

PiperOrigin-RevId: 426091572
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37d088c..1d4fb89 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3282,30 +3282,30 @@
   src/f16-vmulcaddc/gen/c16-minmax-neonfp16arith-2x.c)
 
 SET(PROD_NEONDOT_MICROKERNEL_SRCS
-  src/qc8-gemm/gen/1x8c4-minmax-fp32-neondot.c
-  src/qc8-gemm/gen/1x16c4-minmax-fp32-neondot.c
-  src/qc8-gemm/gen/4x8c4-minmax-fp32-neondot.c
-  src/qc8-gemm/gen/4x16c4-minmax-fp32-neondot.c
-  src/qc8-igemm/gen/1x8c4-minmax-fp32-neondot.c
-  src/qc8-igemm/gen/1x16c4-minmax-fp32-neondot.c
-  src/qc8-igemm/gen/4x8c4-minmax-fp32-neondot.c
-  src/qc8-igemm/gen/4x16c4-minmax-fp32-neondot.c
-  src/qs8-gemm/gen/1x8c4-minmax-rndnu-neondot.c
-  src/qs8-gemm/gen/1x16c4-minmax-rndnu-neondot.c
-  src/qs8-gemm/gen/4x8c4-minmax-rndnu-neondot.c
-  src/qs8-gemm/gen/4x16c4-minmax-rndnu-neondot.c
-  src/qs8-igemm/gen/1x8c4-minmax-rndnu-neondot.c
-  src/qs8-igemm/gen/1x16c4-minmax-rndnu-neondot.c
-  src/qs8-igemm/gen/4x8c4-minmax-rndnu-neondot.c
-  src/qs8-igemm/gen/4x16c4-minmax-rndnu-neondot.c
-  src/qu8-gemm/gen/1x8c4-minmax-rndnu-neondot.c
-  src/qu8-gemm/gen/1x16c4-minmax-rndnu-neondot.c
-  src/qu8-gemm/gen/2x16c4-minmax-rndnu-neondot.c
-  src/qu8-gemm/gen/4x16c4-minmax-rndnu-neondot.c
-  src/qu8-igemm/gen/1x8c4-minmax-rndnu-neondot.c
-  src/qu8-igemm/gen/1x16c4-minmax-rndnu-neondot.c
-  src/qu8-igemm/gen/2x16c4-minmax-rndnu-neondot.c
-  src/qu8-igemm/gen/4x16c4-minmax-rndnu-neondot.c)
+   src/qc8-gemm/gen/1x8c4-minmax-fp32-neondot.c
+   src/qc8-gemm/gen/1x16c4-minmax-fp32-neondot.c
+   src/qc8-gemm/gen/4x8c4-minmax-fp32-neondot.c
+   src/qc8-gemm/gen/4x16c4-minmax-fp32-neondot.c
+   src/qc8-igemm/gen/1x8c4-minmax-fp32-neondot.c
+   src/qc8-igemm/gen/1x16c4-minmax-fp32-neondot.c
+   src/qc8-igemm/gen/4x8c4-minmax-fp32-neondot.c
+   src/qc8-igemm/gen/4x16c4-minmax-fp32-neondot.c
+   src/qs8-gemm/gen/1x8c4-minmax-rndnu-neondot.c
+   src/qs8-gemm/gen/1x16c4-minmax-rndnu-neondot.c
+   src/qs8-gemm/gen/4x8c4-minmax-rndnu-neondot.c
+   src/qs8-gemm/gen/4x16c4-minmax-rndnu-neondot.c
+   src/qs8-igemm/gen/1x8c4-minmax-rndnu-neondot.c
+   src/qs8-igemm/gen/1x16c4-minmax-rndnu-neondot.c
+   src/qs8-igemm/gen/4x8c4-minmax-rndnu-neondot.c
+   src/qs8-igemm/gen/4x16c4-minmax-rndnu-neondot.c
+   src/qu8-gemm/gen/1x8c4-minmax-rndnu-neondot.c
+   src/qu8-gemm/gen/1x16c4-minmax-rndnu-neondot.c
+   src/qu8-gemm/gen/4x8c4-minmax-rndnu-neondot.c
+   src/qu8-gemm/gen/4x16c4-minmax-rndnu-neondot.c
+   src/qu8-igemm/gen/1x8c4-minmax-rndnu-neondot.c
+   src/qu8-igemm/gen/1x16c4-minmax-rndnu-neondot.c
+   src/qu8-igemm/gen/4x8c4-minmax-rndnu-neondot.c
+   src/qu8-igemm/gen/4x16c4-minmax-rndnu-neondot.c)
 
 SET(ALL_NEONDOT_MICROKERNEL_SRCS
   src/qc8-gemm/gen/1x8c4-minmax-fp32-neondot.c