FMA3 microkernels with 4-wide shuffle

- Use the new microkernels by default on 1-gen Zen

PiperOrigin-RevId: 284641473
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e7a0323..f33e55b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -812,6 +812,10 @@
   src/f32-gemm/gen/3x16-fma3-broadcast.c
   src/f32-gemm/gen/4x16-fma3-broadcast.c
   src/f32-gemm/gen/5x16-fma3-broadcast.c
+  src/f32-gemm/gen/1x16s4-fma3-broadcast.c
+  src/f32-gemm/gen/3x16s4-fma3-broadcast.c
+  src/f32-gemm/gen/4x16s4-fma3-broadcast.c
+  src/f32-gemm/gen/5x16s4-fma3-broadcast.c
   src/f32-gemm/gen-inc/1x8-fma3-broadcast.c
   src/f32-gemm/gen-inc/4x8-fma3-broadcast.c
   src/f32-gemm/gen-inc/5x8-fma3-broadcast.c
@@ -822,6 +826,10 @@
   src/f32-gemm/gen-inc/3x16-fma3-broadcast.c
   src/f32-gemm/gen-inc/4x16-fma3-broadcast.c
   src/f32-gemm/gen-inc/5x16-fma3-broadcast.c
+  src/f32-gemm/gen-inc/1x16s4-fma3-broadcast.c
+  src/f32-gemm/gen-inc/3x16s4-fma3-broadcast.c
+  src/f32-gemm/gen-inc/4x16s4-fma3-broadcast.c
+  src/f32-gemm/gen-inc/5x16s4-fma3-broadcast.c
   src/f32-igemm/gen/1x8-fma3-broadcast.c
   src/f32-igemm/gen/4x8-fma3-broadcast.c
   src/f32-igemm/gen/5x8-fma3-broadcast.c
@@ -831,7 +839,11 @@
   src/f32-igemm/gen/1x16-fma3-broadcast.c
   src/f32-igemm/gen/3x16-fma3-broadcast.c
   src/f32-igemm/gen/4x16-fma3-broadcast.c
-  src/f32-igemm/gen/5x16-fma3-broadcast.c)
+  src/f32-igemm/gen/5x16-fma3-broadcast.c
+  src/f32-igemm/gen/1x16s4-fma3-broadcast.c
+  src/f32-igemm/gen/3x16s4-fma3-broadcast.c
+  src/f32-igemm/gen/4x16s4-fma3-broadcast.c
+  src/f32-igemm/gen/5x16s4-fma3-broadcast.c)
 
 SET(XNNPACK_AVX2_MICROKERNEL_SRCS
   src/f32-raddexpminusmax/gen/avx2-p5-x64.c