Support FP32 requantization in AVX2 QS8 microkernels
PiperOrigin-RevId: 375822588
diff --git a/BUILD.bazel b/BUILD.bazel
index bd027df..64d6fc9 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -3359,26 +3359,44 @@
"src/math/sigmoid-avx2-rr2-p5-nr1fma.c",
"src/math/sigmoid-avx2-rr2-p5-nr2fma.c",
"src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c",
+ "src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c",
"src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c",
+ "src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c",
"src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c",
+ "src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c",
"src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c",
"src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c",
+ "src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c",
"src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c",
+ "src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c",
"src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c",
+ "src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c",
"src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c",
"src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c",
+ "src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c",
"src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c",
"src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c",
+ "src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c",
"src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c",
"src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c",
+ "src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c",
"src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c",
+ "src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c",
"src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c",
+ "src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c",
"src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x8.c",
"src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x16.c",
"src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x24.c",
@@ -6904,6 +6922,16 @@
)
xnnpack_unit_test(
+ name = "qs8_dwconv_minmax_fp32_test",
+ srcs = [
+ "test/qs8-dwconv-minmax-fp32.cc",
+ "test/dwconv-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS + [":packing"],
+)
+
+xnnpack_unit_test(
name = "qs8_gavgpool_minmax_test",
srcs = [
"test/qs8-gavgpool-minmax.cc",
@@ -6925,6 +6953,17 @@
)
xnnpack_unit_test(
+ name = "qs8_gemm_minmax_fp32_test",
+ timeout = "moderate",
+ srcs = [
+ "test/qs8-gemm-minmax-fp32.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS + [":packing"],
+)
+
+xnnpack_unit_test(
name = "qs8_igemm_minmax_gemmlowp_test",
timeout = "moderate",
srcs = [
@@ -6936,6 +6975,17 @@
)
xnnpack_unit_test(
+ name = "qs8_igemm_minmax_fp32_test",
+ timeout = "moderate",
+ srcs = [
+ "test/qs8-igemm-minmax-fp32.cc",
+ "test/gemm-microkernel-tester.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS + [":packing"],
+)
+
+xnnpack_unit_test(
name = "qs8_requantization_test",
srcs = [
"src/xnnpack/requantization-stubs.h",