Support QC8 DWCONV microkernels
- Minimal set of 8-bit fixed-point microkernels with per-channel quantization
(QC8) optimized for AVX2.
- Extend packing functions to allow extra space after the kernel data.
Per-channel quantization parameters are later packed into that space.
- Extend DWConvMicrokernelTester to support unit testing of QC8 DWCONV.
PiperOrigin-RevId: 377620717
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9ce36a3..1c71c90 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2883,15 +2883,27 @@
src/math/sigmoid-avx2-rr2-p5-div.c
src/math/sigmoid-avx2-rr2-p5-nr1fma.c
src/math/sigmoid-avx2-rr2-p5-nr2fma.c
+ src/qc8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c
+ src/qc8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c
+ src/qc8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c
+ src/qc8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c
+ src/qc8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c
+ src/qc8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c
src/qc8-gemm/gen/1x8c8-minmax-fp32-avx2.c
src/qc8-gemm/gen/1x8c8-xw-minmax-fp32-avx2.c
src/qc8-gemm/gen/2x8c8-minmax-fp32-avx2.c
src/qc8-gemm/gen/2x8c8-xw-minmax-fp32-avx2.c
src/qc8-gemm/gen/3x8c8-minmax-fp32-avx2.c
src/qc8-gemm/gen/3x8c8-xw-minmax-fp32-avx2.c
- src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c
- src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c
- src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c
+ src/qc8-igemm/gen/1x8c8-minmax-fp32-avx2.c
+ src/qc8-igemm/gen/2x8c8-minmax-fp32-avx2.c
+ src/qc8-igemm/gen/3x8c8-minmax-fp32-avx2.c
src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
@@ -4942,6 +4954,15 @@
TARGET_LINK_LIBRARIES(f32-vrsubc-relu-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-vrsubc-relu-test f32-vrsubc-relu-test)
+ ADD_EXECUTABLE(qc8-dwconv-minmax-fp32-test test/qc8-dwconv-minmax-fp32.cc)
+ SET_TARGET_PROPERTIES(qc8-dwconv-minmax-fp32-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(qc8-dwconv-minmax-fp32-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(qc8-dwconv-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(qc8-dwconv-minmax-fp32-test qc8-dwconv-minmax-fp32-test)
+
ADD_EXECUTABLE(qc8-gemm-minmax-fp32-test test/qc8-gemm-minmax-fp32.cc)
SET_TARGET_PROPERTIES(qc8-gemm-minmax-fp32-test PROPERTIES
CXX_STANDARD 11