AVX and FMA3 microkernels for GEMM/GEMMINC/IGEMM
PiperOrigin-RevId: 281807374
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5561c58..8526346 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -634,9 +634,44 @@
src/f32-prelu/sse41-2x8.c)
SET(XNNPACK_AVX_MICROKERNEL_SRCS
+ src/f32-gemm/1x8-avx-broadcast.c
+ src/f32-gemm/4x8-avx-broadcast.c
+ src/f32-gemm/5x8-avx-broadcast.c
+ src/f32-gemm/6x8-avx-broadcast.c
+ src/f32-gemm/7x8-avx-broadcast.c
+ src/f32-gemminc/1x8-avx-broadcast.c
+ src/f32-gemminc/4x8-avx-broadcast.c
+ src/f32-gemminc/5x8-avx-broadcast.c
+ src/f32-gemminc/6x8-avx-broadcast.c
+ src/f32-gemminc/7x8-avx-broadcast.c
+ src/f32-igemm/1x8-avx-broadcast.c
+ src/f32-igemm/4x8-avx-broadcast.c
+ src/f32-igemm/5x8-avx-broadcast.c
+ src/f32-igemm/6x8-avx-broadcast.c
+ src/f32-igemm/7x8-avx-broadcast.c
src/f32-rmax/avx.c
src/f32-vscale/avx-unroll32.c)
+SET(XNNPACK_FMA3_MICROKERNEL_SRCS
+ src/f32-gemm/1x8-fma3-broadcast.c
+ src/f32-gemm/4x8-fma3-broadcast.c
+ src/f32-gemm/5x8-fma3-broadcast.c
+ src/f32-gemm/6x8-fma3-broadcast.c
+ src/f32-gemm/7x8-fma3-broadcast.c
+ src/f32-gemm/8x8-fma3-broadcast.c
+ src/f32-gemminc/1x8-fma3-broadcast.c
+ src/f32-gemminc/4x8-fma3-broadcast.c
+ src/f32-gemminc/5x8-fma3-broadcast.c
+ src/f32-gemminc/6x8-fma3-broadcast.c
+ src/f32-gemminc/7x8-fma3-broadcast.c
+ src/f32-gemminc/8x8-fma3-broadcast.c
+ src/f32-igemm/1x8-fma3-broadcast.c
+ src/f32-igemm/4x8-fma3-broadcast.c
+ src/f32-igemm/5x8-fma3-broadcast.c
+ src/f32-igemm/6x8-fma3-broadcast.c
+ src/f32-igemm/7x8-fma3-broadcast.c
+ src/f32-igemm/8x8-fma3-broadcast.c)
+
SET(XNNPACK_AVX2_MICROKERNEL_SRCS
src/f32-raddexpminusmax/avx2-p5-unroll64.c
src/f32-raddextexp/avx2-p5-unroll64.c
@@ -736,6 +771,7 @@
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE2_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE41_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_AVX_MICROKERNEL_SRCS})
+ LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_FMA3_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_AVX2_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_AVX512F_MICROKERNEL_SRCS})
ENDIF()
@@ -772,6 +808,7 @@
SET_PROPERTY(SOURCE ${XNNPACK_SSE2_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse2 ")
SET_PROPERTY(SOURCE ${XNNPACK_SSE41_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse4.1 ")
SET_PROPERTY(SOURCE ${XNNPACK_AVX_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx ")
+ SET_PROPERTY(SOURCE ${XNNPACK_FMA3_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfma ")
SET_PROPERTY(SOURCE ${XNNPACK_AVX2_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfma -mavx2 ")
SET_PROPERTY(SOURCE ${XNNPACK_AVX512F_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx512f ")
ENDIF()