Support QC8 DWCONV microkernels

- Minimal set of 8-bit fixed-point microkernels with per-channel quantization
(QC8) optimized for AVX2.
- Extend packing functions to allow extra space after the kernel data.
Per-channel quantization parameters are later packed into that space.
- Extend DWConvMicrokernelTester to support unit testing of QC8 DWCONV.

PiperOrigin-RevId: 377620717
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9ce36a3..1c71c90 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2883,15 +2883,27 @@
   src/math/sigmoid-avx2-rr2-p5-div.c
   src/math/sigmoid-avx2-rr2-p5-nr1fma.c
   src/math/sigmoid-avx2-rr2-p5-nr2fma.c
+  src/qc8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c
+  src/qc8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c
+  src/qc8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c
+  src/qc8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c
+  src/qc8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c
+  src/qc8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c
   src/qc8-gemm/gen/1x8c8-minmax-fp32-avx2.c
   src/qc8-gemm/gen/1x8c8-xw-minmax-fp32-avx2.c
   src/qc8-gemm/gen/2x8c8-minmax-fp32-avx2.c
   src/qc8-gemm/gen/2x8c8-xw-minmax-fp32-avx2.c
   src/qc8-gemm/gen/3x8c8-minmax-fp32-avx2.c
   src/qc8-gemm/gen/3x8c8-xw-minmax-fp32-avx2.c
-  src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c
-  src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c
-  src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c
+  src/qc8-igemm/gen/1x8c8-minmax-fp32-avx2.c
+  src/qc8-igemm/gen/2x8c8-minmax-fp32-avx2.c
+  src/qc8-igemm/gen/3x8c8-minmax-fp32-avx2.c
   src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
   src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
@@ -4942,6 +4954,15 @@
   TARGET_LINK_LIBRARIES(f32-vrsubc-relu-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(f32-vrsubc-relu-test f32-vrsubc-relu-test)
 
+  ADD_EXECUTABLE(qc8-dwconv-minmax-fp32-test test/qc8-dwconv-minmax-fp32.cc)
+  SET_TARGET_PROPERTIES(qc8-dwconv-minmax-fp32-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(qc8-dwconv-minmax-fp32-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(qc8-dwconv-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(qc8-dwconv-minmax-fp32-test qc8-dwconv-minmax-fp32-test)
+
   ADD_EXECUTABLE(qc8-gemm-minmax-fp32-test test/qc8-gemm-minmax-fp32.cc)
   SET_TARGET_PROPERTIES(qc8-gemm-minmax-fp32-test PROPERTIES
     CXX_STANDARD 11