Move generated micro-kernels into a subdirectory

PiperOrigin-RevId: 282322486
diff --git a/scripts/generate-f16-gemm.sh b/scripts/generate-f16-gemm.sh
index 0a00557..5ea5ba5 100755
--- a/scripts/generate-f16-gemm.sh
+++ b/scripts/generate-f16-gemm.sh
@@ -6,10 +6,9 @@
 
 ########################## ARM NEON with FP16 compute #########################
 ### LD64 micro-kernels
-tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=8 -o src/f16-gemm/4x8-neonfp16arith-ld64.c
-tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=8 -o src/f16-gemm/6x8-neonfp16arith-ld64.c
-tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=8 -o src/f16-gemm/8x8-neonfp16arith-ld64.c
-
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=8 -o src/f16-gemm/gen/4x8-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=8 -o src/f16-gemm/gen/6x8-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=8 -o src/f16-gemm/gen/8x8-neonfp16arith-ld64.c
 
 ################################## Unit tests #################################
 tools/generate-gemm-test.py --spec test/f16-gemm.yaml --output test/f16-gemm.cc