QS8 DWCONV microkernels with RNDNU requantization
Enable RNDNU-requantized DWCONV microkernels on AArch32 for a minor performance improvement on Pixel 2:
- QS8 MobileNet v1: 73234 us -> 72757 us
- QS8 MobileNet v1: 51472 us -> 51203 us
PiperOrigin-RevId: 385261813
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e7a9a9c..0d9a661 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1119,12 +1119,16 @@
src/qc8-igemm/gen/4x16-minmax-fp32-neon-mlal-lane.c
src/qs8-dwconv/gen/up8x9-minmax-fp32-neon-mul16.c
src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
+ src/qs8-dwconv/gen/up8x9-minmax-rndnu-neon-mul16.c
src/qs8-dwconv/gen/up8x25-minmax-fp32-neon-mul16.c
src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
+ src/qs8-dwconv/gen/up8x25-minmax-rndnu-neon-mul16.c
src/qs8-dwconv/gen/up16x9-minmax-fp32-neon-mul16.c
src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
+ src/qs8-dwconv/gen/up16x9-minmax-rndnu-neon-mul16.c
src/qs8-dwconv/gen/up16x25-minmax-fp32-neon-mul16.c
src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
+ src/qs8-dwconv/gen/up16x25-minmax-rndnu-neon-mul16.c
src/qs8-dwconv/gen/up24x9-minmax-fp32-neon-mul16.c
src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
src/qs8-dwconv/gen/up24x25-minmax-fp32-neon-mul16.c
@@ -5382,6 +5386,15 @@
TARGET_LINK_LIBRARIES(qc8-igemm-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(qc8-igemm-minmax-fp32-test qc8-igemm-minmax-fp32-test)
+ ADD_EXECUTABLE(qs8-dwconv-minmax-fp32-test test/qs8-dwconv-minmax-fp32.cc)
+ SET_TARGET_PROPERTIES(qs8-dwconv-minmax-fp32-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(qs8-dwconv-minmax-fp32-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(qs8-dwconv-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(qs8-dwconv-minmax-fp32-test qs8-dwconv-minmax-fp32-test)
+
ADD_EXECUTABLE(qs8-dwconv-minmax-gemmlowp-test test/qs8-dwconv-minmax-gemmlowp.cc)
SET_TARGET_PROPERTIES(qs8-dwconv-minmax-gemmlowp-test PROPERTIES
CXX_STANDARD 11
@@ -5391,14 +5404,14 @@
TARGET_LINK_LIBRARIES(qs8-dwconv-minmax-gemmlowp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(qs8-dwconv-minmax-gemmlowp-test qs8-dwconv-minmax-gemmlowp-test)
- ADD_EXECUTABLE(qs8-dwconv-minmax-fp32-test test/qs8-dwconv-minmax-fp32.cc)
- SET_TARGET_PROPERTIES(qs8-dwconv-minmax-fp32-test PROPERTIES
+ ADD_EXECUTABLE(qs8-dwconv-minmax-rndnu-test test/qs8-dwconv-minmax-rndnu.cc)
+ SET_TARGET_PROPERTIES(qs8-dwconv-minmax-rndnu-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(qs8-dwconv-minmax-fp32-test PRIVATE src test)
- TARGET_LINK_LIBRARIES(qs8-dwconv-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
- ADD_TEST(qs8-dwconv-minmax-fp32-test qs8-dwconv-minmax-fp32-test)
+ TARGET_INCLUDE_DIRECTORIES(qs8-dwconv-minmax-rndnu-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(qs8-dwconv-minmax-rndnu-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(qs8-dwconv-minmax-rndnu-test qs8-dwconv-minmax-rndnu-test)
ADD_EXECUTABLE(qs8-gavgpool-minmax-test test/qs8-gavgpool-minmax.cc)
SET_TARGET_PROPERTIES(qs8-gavgpool-minmax-test PROPERTIES