NR=16 GEMM and IGEMM micro-kernels in AVX and FMA3 implementations

PiperOrigin-RevId: 284464344
diff --git a/BUILD.bazel b/BUILD.bazel
index 676c071..1421c6a 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -766,16 +766,28 @@
     "src/f32-gemm/gen/5x8-avx-broadcast.c",
     "src/f32-gemm/gen/6x8-avx-broadcast.c",
     "src/f32-gemm/gen/7x8-avx-broadcast.c",
+    "src/f32-gemm/gen/1x16-avx-broadcast.c",
+    "src/f32-gemm/gen/3x16-avx-broadcast.c",
+    "src/f32-gemm/gen/4x16-avx-broadcast.c",
+    "src/f32-gemm/gen/5x16-avx-broadcast.c",
     "src/f32-gemm/gen-inc/1x8-avx-broadcast.c",
     "src/f32-gemm/gen-inc/4x8-avx-broadcast.c",
     "src/f32-gemm/gen-inc/5x8-avx-broadcast.c",
     "src/f32-gemm/gen-inc/6x8-avx-broadcast.c",
     "src/f32-gemm/gen-inc/7x8-avx-broadcast.c",
+    "src/f32-gemm/gen-inc/1x16-avx-broadcast.c",
+    "src/f32-gemm/gen-inc/3x16-avx-broadcast.c",
+    "src/f32-gemm/gen-inc/4x16-avx-broadcast.c",
+    "src/f32-gemm/gen-inc/5x16-avx-broadcast.c",
     "src/f32-igemm/gen/1x8-avx-broadcast.c",
     "src/f32-igemm/gen/4x8-avx-broadcast.c",
     "src/f32-igemm/gen/5x8-avx-broadcast.c",
     "src/f32-igemm/gen/6x8-avx-broadcast.c",
     "src/f32-igemm/gen/7x8-avx-broadcast.c",
+    "src/f32-igemm/gen/1x16-avx-broadcast.c",
+    "src/f32-igemm/gen/3x16-avx-broadcast.c",
+    "src/f32-igemm/gen/4x16-avx-broadcast.c",
+    "src/f32-igemm/gen/5x16-avx-broadcast.c",
     "src/f32-rmax/avx.c",
     "src/f32-vscale/avx-unroll32.c",
 ]
@@ -799,18 +811,30 @@
     "src/f32-gemm/gen/6x8-fma3-broadcast.c",
     "src/f32-gemm/gen/7x8-fma3-broadcast.c",
     "src/f32-gemm/gen/8x8-fma3-broadcast.c",
+    "src/f32-gemm/gen/1x16-fma3-broadcast.c",
+    "src/f32-gemm/gen/3x16-fma3-broadcast.c",
+    "src/f32-gemm/gen/4x16-fma3-broadcast.c",
+    "src/f32-gemm/gen/5x16-fma3-broadcast.c",
     "src/f32-gemm/gen-inc/1x8-fma3-broadcast.c",
     "src/f32-gemm/gen-inc/4x8-fma3-broadcast.c",
     "src/f32-gemm/gen-inc/5x8-fma3-broadcast.c",
     "src/f32-gemm/gen-inc/6x8-fma3-broadcast.c",
     "src/f32-gemm/gen-inc/7x8-fma3-broadcast.c",
     "src/f32-gemm/gen-inc/8x8-fma3-broadcast.c",
+    "src/f32-gemm/gen-inc/1x16-fma3-broadcast.c",
+    "src/f32-gemm/gen-inc/3x16-fma3-broadcast.c",
+    "src/f32-gemm/gen-inc/4x16-fma3-broadcast.c",
+    "src/f32-gemm/gen-inc/5x16-fma3-broadcast.c",
     "src/f32-igemm/gen/1x8-fma3-broadcast.c",
     "src/f32-igemm/gen/4x8-fma3-broadcast.c",
     "src/f32-igemm/gen/5x8-fma3-broadcast.c",
     "src/f32-igemm/gen/6x8-fma3-broadcast.c",
     "src/f32-igemm/gen/7x8-fma3-broadcast.c",
     "src/f32-igemm/gen/8x8-fma3-broadcast.c",
+    "src/f32-igemm/gen/1x16-fma3-broadcast.c",
+    "src/f32-igemm/gen/3x16-fma3-broadcast.c",
+    "src/f32-igemm/gen/4x16-fma3-broadcast.c",
+    "src/f32-igemm/gen/5x16-fma3-broadcast.c",
 ]
 
 AVX2_UKERNELS = [