FMA3 microkernels with 4-wide shuffle

- Use the new microkernels by default on 1-gen Zen

PiperOrigin-RevId: 284641473
diff --git a/scripts/generate-f32-igemm.sh b/scripts/generate-f32-igemm.sh
index fd5da68..ffdfd6f 100755
--- a/scripts/generate-f32-igemm.sh
+++ b/scripts/generate-f32-igemm.sh
@@ -109,6 +109,11 @@
 tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16-fma3-broadcast.c
 tools/xngen src/f32-igemm/avx-broadcast.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16-fma3-broadcast.c
 
+tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=1 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/1x16s4-fma3.c
+tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=3 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/3x16s4-fma3.c
+tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=4 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/4x16s4-fma3.c
+tools/xngen src/f32-igemm/avx-shuffle4.c.in -D MR=5 -D NR=16 -D FMA=3 -o src/f32-igemm/gen/5x16s4-fma3.c
+
 ################################# x86 AVX-512 #################################
 ### AVX512F+BROADCAST micro-kernels
 tools/xngen src/f32-igemm/avx512-broadcast.c.in -D MR=1 -D NR=16 -o src/f32-igemm/gen/1x16-avx512f-broadcast.c