Nx16 FP16 intrinsic GEMM and IGEMM ukernels
IGEMM performance mobilenet_v2 on Pixel 4
f16_igemm_6x16__neonfp16arith_ld64 8986179
f16_igemm_4x16__neonfp16arith_ld64 9969704
f16_igemm_8x16__neonfp16arith_ld64 10405198
f16_igemm_1x16__neonfp16arith_ld64 11418367
f16_igemm_6x8__neonfp16arith_ld64 11488318
f16_igemm_4x8__neonfp16arith_ld64 11566898
f16_igemm_8x8__neonfp16arith_ld64 11992713
f16_igemm_1x8__neonfp16arith_ld64 24999989
GEMM performance mobilenet_v2 on Pixel 4
f16_gemm_6x8__aarch64_neonfp16arith_ld64 5026706
f16_gemm_6x16__aarch64_neonfp16arith_ld32 5067141
f16_gemm_4x16__aarch64_neonfp16arith_ld32 5083430
f16_gemm_8x8__aarch64_neonfp16arith_ld64 5157512
f16_gemm_4x8__aarch64_neonfp16arith_ld64 5215077
f16_gemm_6x16__neonfp16arith_ld64 8605063
f16_gemm_4x16__neonfp16arith_ld64 8856811
f16_gemm_1x16__aarch64_neonfp16arith_ld32 9828810
f16_gemm_1x16__neonfp16arith_ld64 10187374
f16_gemm_1x8__aarch64_neonfp16arith_ld64 10449552
f16_gemm_4x8__neonfp16arith_ld64 10537397
f16_gemm_6x8__neonfp16arith_ld64 10797656
f16_gemm_8x8__neonfp16arith_ld64 11174740
f16_gemm_8x16__neonfp16arith_ld64 12283543
f16_gemm_1x8__neonfp16arith_ld64 16208999
PiperOrigin-RevId: 310106462
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb4e630..40a04e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -921,6 +921,18 @@
src/f16-igemm/gen/4x8-minmax-neonfp16arith-ld64.c
src/f16-igemm/gen/6x8-minmax-neonfp16arith-ld64.c
src/f16-igemm/gen/8x8-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen/1x16-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen/4x16-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen/6x16-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen/8x16-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen-inc/1x16inc-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen-inc/4x16inc-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen-inc/6x16inc-minmax-neonfp16arith-ld64.c
+ src/f16-gemm/gen-inc/8x16inc-minmax-neonfp16arith-ld64.c
+ src/f16-igemm/gen/1x16-minmax-neonfp16arith-ld64.c
+ src/f16-igemm/gen/4x16-minmax-neonfp16arith-ld64.c
+ src/f16-igemm/gen/6x16-minmax-neonfp16arith-ld64.c
+ src/f16-igemm/gen/8x16-minmax-neonfp16arith-ld64.c
src/f16-spmm/gen/8x1-minmax-neonfp16arith.c
src/f16-spmm/gen/8x1-minmax-neonfp16arith-unroll2.c
src/f16-spmm/gen/16x1-minmax-neonfp16arith.c