Generalize PAD microkernels to all 8-/16-/32-bit data types
PiperOrigin-RevId: 389507611
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bd7accc..3dd4eba 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -421,15 +421,14 @@
src/x32-packx/x2-scalar.c
src/x32-packx/x3-scalar.c
src/x32-packx/x4-scalar.c
- src/x32-pad/scalar-float.c
- src/x32-pad/scalar-int.c
src/x32-unpool/scalar.c
src/x32-zip/x2-scalar.c
src/x32-zip/x3-scalar.c
src/x32-zip/x4-scalar.c
src/x32-zip/xm-scalar.c
src/xx-copy/memcpy.c
- src/xx-fill/scalar-x16.c)
+ src/xx-fill/scalar-x16.c
+ src/xx-pad/scalar.c)
SET(ALL_SCALAR_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-scalar-c1.c
@@ -1031,15 +1030,14 @@
src/x32-packx/x2-scalar.c
src/x32-packx/x3-scalar.c
src/x32-packx/x4-scalar.c
- src/x32-pad/scalar-float.c
- src/x32-pad/scalar-int.c
src/x32-unpool/scalar.c
src/x32-zip/x2-scalar.c
src/x32-zip/x3-scalar.c
src/x32-zip/x4-scalar.c
src/x32-zip/xm-scalar.c
src/xx-copy/memcpy.c
- src/xx-fill/scalar-x16.c)
+ src/xx-fill/scalar-x16.c
+ src/xx-pad/scalar.c)
SET(PROD_NEON_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-neon-c4.c
@@ -1147,13 +1145,13 @@
src/x8-zip/x4-neon.c
src/x8-zip/xm-neon.c
src/x32-packx/x4-neon-st4.c
- src/x32-pad/neon.c
src/x32-unpool/neon.c
src/x32-zip/x2-neon.c
src/x32-zip/x3-neon.c
src/x32-zip/x4-neon.c
src/x32-zip/xm-neon.c
- src/xx-fill/neon-x64.c)
+ src/xx-fill/neon-x64.c
+ src/xx-pad/neon.c)
SET(ALL_NEON_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-neon-c4.c
@@ -1717,13 +1715,13 @@
src/x8-zip/x4-neon.c
src/x8-zip/xm-neon.c
src/x32-packx/x4-neon-st4.c
- src/x32-pad/neon.c
src/x32-unpool/neon.c
src/x32-zip/x2-neon.c
src/x32-zip/x3-neon.c
src/x32-zip/x4-neon.c
src/x32-zip/xm-neon.c
- src/xx-fill/neon-x64.c)
+ src/xx-fill/neon-x64.c
+ src/xx-pad/neon.c)
SET(PROD_NEONFMA_MICROKERNEL_SRCS
src/f32-dwconv/gen/up4x9-minmax-neonfma.c
@@ -2402,8 +2400,7 @@
src/f32-vunary/gen/vabs-sse-x8.c
src/f32-vunary/gen/vneg-sse-x8.c
src/f32-vunary/gen/vsqr-sse-x8.c
- src/x32-packx/x4-sse.c
- src/x32-pad/sse.c)
+ src/x32-packx/x4-sse.c)
SET(ALL_SSE_MICROKERNEL_SRCS
src/f32-avgpool/9p8x-minmax-sse-c4.c
@@ -2576,8 +2573,7 @@
src/math/sqrt-sse-hh1mac.c
src/math/sqrt-sse-nr1mac.c
src/math/sqrt-sse-nr2mac.c
- src/x32-packx/x4-sse.c
- src/x32-pad/sse.c)
+ src/x32-packx/x4-sse.c)
SET(PROD_SSE2_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-sse2-c4.c
@@ -2636,7 +2632,8 @@
src/x32-zip/x3-sse2.c
src/x32-zip/x4-sse2.c
src/x32-zip/xm-sse2.c
- src/xx-fill/sse2-x64.c)
+ src/xx-fill/sse2-x64.c
+ src/xx-pad/sse2.c)
SET(ALL_SSE2_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-sse2-c4.c
@@ -2894,7 +2891,8 @@
src/x32-zip/x3-sse2.c
src/x32-zip/x4-sse2.c
src/x32-zip/xm-sse2.c
- src/xx-fill/sse2-x64.c)
+ src/xx-fill/sse2-x64.c
+ src/xx-pad/sse2.c)
SET(PROD_SSSE3_MICROKERNEL_SRCS
src/f32-dwconv2d-chw/gen/3x3p1-minmax-ssse3-2x4-acc2.c
@@ -6718,15 +6716,6 @@
TARGET_LINK_LIBRARIES(x32-packx-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
ADD_TEST(x32-packx-test x32-packx-test)
- ADD_EXECUTABLE(x32-pad-test test/x32-pad.cc $<TARGET_OBJECTS:all_microkernels>)
- SET_TARGET_PROPERTIES(x32-pad-test PROPERTIES
- CXX_STANDARD 11
- CXX_STANDARD_REQUIRED YES
- CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(x32-pad-test PRIVATE include src test)
- TARGET_LINK_LIBRARIES(x32-pad-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
- ADD_TEST(x32-pad-test x32-pad-test)
-
ADD_EXECUTABLE(x32-unpool-test test/x32-unpool.cc $<TARGET_OBJECTS:all_microkernels>)
SET_TARGET_PROPERTIES(x32-unpool-test PROPERTIES
CXX_STANDARD 11
@@ -6780,6 +6769,15 @@
TARGET_INCLUDE_DIRECTORIES(xx-fill-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(xx-fill-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
ADD_TEST(xx-fill-test xx-fill-test)
+
+ ADD_EXECUTABLE(xx-pad-test test/xx-pad.cc $<TARGET_OBJECTS:all_microkernels>)
+ SET_TARGET_PROPERTIES(xx-pad-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(xx-pad-test PRIVATE include src test)
+ TARGET_LINK_LIBRARIES(xx-pad-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
+ ADD_TEST(xx-pad-test xx-pad-test)
ENDIF()
# ---[ XNNPACK microbenchmarks