Refactor and open-source Three-Pass Softmax micro-kernels
- RAddExpMinusMax micro-kernel (AVX2 and AVX512F)
- RAddStoreExpMinusMax micro-kernel (AVX2 and AVX512F)
- VScaleExpMinusMax micro-kernel (AVX2 and AVX512F)
- Unit tests for all micro-kernels
PiperOrigin-RevId: 275570264
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 480380a..0a631cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -486,13 +486,19 @@
src/f32-rmax/avx.c)
SET(XNNPACK_AVX2_MICROKERNEL_SRCS
+ src/f32-raddexpminusmax/avx2-p5-unroll64.c
+ src/f32-raddstoreexpminusmax/avx2-p5-unroll64.c
+ src/f32-vscaleexpminusmax/avx2-p5-unroll64.c
src/math/exp-avx2-p5.c
src/math/exp-avx2-perm-p3.c
src/math/exp-avx2-perm-p4.c
src/math/expminus-avx2-p5.c)
SET(XNNPACK_AVX512F_MICROKERNEL_SRCS
+ src/f32-raddexpminusmax/avx512f-p5-scalef-unroll128.c
+ src/f32-raddstoreexpminusmax/avx512f-p5-scalef-unroll128.c
src/f32-rmax/avx512f.c
+ src/f32-vscaleexpminusmax/avx512f-p5-scalef-unroll128.c
src/math/exp-avx512f-p5-scalef.c
src/math/exp-avx512f-p5.c
src/math/exp-avx512f-perm-p3.c)
@@ -1030,6 +1036,24 @@
TARGET_LINK_LIBRARIES(f32-prelu-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-prelu-test f32-prelu-test)
+ ADD_EXECUTABLE(f32-raddexpminusmax-test test/f32-raddexpminusmax.cc)
+ SET_TARGET_PROPERTIES(f32-raddexpminusmax-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(f32-raddexpminusmax-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-raddexpminusmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-raddexpminusmax-test f32-raddexpminusmax-test)
+
+ ADD_EXECUTABLE(f32-raddstoreexpminusmax-test test/f32-raddstoreexpminusmax.cc)
+ SET_TARGET_PROPERTIES(f32-raddstoreexpminusmax-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(f32-raddstoreexpminusmax-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-raddstoreexpminusmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-raddstoreexpminusmax-test f32-raddstoreexpminusmax-test)
+
ADD_EXECUTABLE(f32-rmax-test test/f32-rmax.cc)
SET_TARGET_PROPERTIES(f32-rmax-test PROPERTIES
CXX_STANDARD 11
@@ -1075,6 +1099,15 @@
TARGET_LINK_LIBRARIES(f32-vmulcaddc-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-vmulcaddc-test f32-vmulcaddc-test)
+ ADD_EXECUTABLE(f32-vscaleexpminusmax-test test/f32-vscaleexpminusmax.cc)
+ SET_TARGET_PROPERTIES(f32-vscaleexpminusmax-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(f32-vscaleexpminusmax-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-vscaleexpminusmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-vscaleexpminusmax-test f32-vscaleexpminusmax-test)
+
ADD_EXECUTABLE(f32-vsub-test test/f32-vsub.cc)
SET_TARGET_PROPERTIES(f32-vsub-test PROPERTIES
CXX_STANDARD 11