Nx16 FP16 intrinsic GEMM and IGEMM ukernels

IGEMM performance mobilenet_v2 on Pixel 4
f16_igemm_6x16__neonfp16arith_ld64                       8986179
f16_igemm_4x16__neonfp16arith_ld64                       9969704
f16_igemm_8x16__neonfp16arith_ld64                       10405198
f16_igemm_1x16__neonfp16arith_ld64                       11418367
f16_igemm_6x8__neonfp16arith_ld64                        11488318
f16_igemm_4x8__neonfp16arith_ld64                        11566898
f16_igemm_8x8__neonfp16arith_ld64                        11992713
f16_igemm_1x8__neonfp16arith_ld64                        24999989

GEMM performance mobilenet_v2 on Pixel 4
f16_gemm_6x8__aarch64_neonfp16arith_ld64                 5026706
f16_gemm_6x16__aarch64_neonfp16arith_ld32                5067141
f16_gemm_4x16__aarch64_neonfp16arith_ld32                5083430
f16_gemm_8x8__aarch64_neonfp16arith_ld64                 5157512
f16_gemm_4x8__aarch64_neonfp16arith_ld64                 5215077
f16_gemm_6x16__neonfp16arith_ld64                        8605063
f16_gemm_4x16__neonfp16arith_ld64                        8856811
f16_gemm_1x16__aarch64_neonfp16arith_ld32                9828810
f16_gemm_1x16__neonfp16arith_ld64                        10187374
f16_gemm_1x8__aarch64_neonfp16arith_ld64                 10449552
f16_gemm_4x8__neonfp16arith_ld64                         10537397
f16_gemm_6x8__neonfp16arith_ld64                         10797656
f16_gemm_8x8__neonfp16arith_ld64                         11174740
f16_gemm_8x16__neonfp16arith_ld64                        12283543
f16_gemm_1x8__neonfp16arith_ld64                         16208999

PiperOrigin-RevId: 310106462
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb4e630..40a04e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -921,6 +921,18 @@
   src/f16-igemm/gen/4x8-minmax-neonfp16arith-ld64.c
   src/f16-igemm/gen/6x8-minmax-neonfp16arith-ld64.c
   src/f16-igemm/gen/8x8-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen/1x16-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen/4x16-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen/6x16-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen/8x16-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen-inc/1x16inc-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen-inc/4x16inc-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen-inc/6x16inc-minmax-neonfp16arith-ld64.c
+  src/f16-gemm/gen-inc/8x16inc-minmax-neonfp16arith-ld64.c
+  src/f16-igemm/gen/1x16-minmax-neonfp16arith-ld64.c
+  src/f16-igemm/gen/4x16-minmax-neonfp16arith-ld64.c
+  src/f16-igemm/gen/6x16-minmax-neonfp16arith-ld64.c
+  src/f16-igemm/gen/8x16-minmax-neonfp16arith-ld64.c
   src/f16-spmm/gen/8x1-minmax-neonfp16arith.c
   src/f16-spmm/gen/8x1-minmax-neonfp16arith-unroll2.c
   src/f16-spmm/gen/16x1-minmax-neonfp16arith.c