QS8 GEMM microkernels and infrastructure
- QS8 GEMM microkernels for SSE2/SSSE3/SSE4.1
- Updated unit test generator to support SSSE3 ISA
- Updated GEMM tester to support QS8 GEMM
- Updated weights packing functions to support QS8 GEMM
- Microbenchmark for QS8 GEMM microkernels
PiperOrigin-RevId: 324231357
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 53a4f9f..1cabb8b 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1282,6 +1282,8 @@
src/f32-vrnd/gen/vrndu-sse2-x8.c
src/f32-vrnd/gen/vrndd-sse2-x4.c
src/f32-vrnd/gen/vrndd-sse2-x8.c
+ src/qs8-gemm/1x4c2-minmax-sse2.c
+ src/qs8-gemm/4x4c2-minmax-sse2.c
src/qs8-requantization/fp32-sse2.c
src/qs8-requantization/precise-sse2.c
src/qs8-requantization/q31-sse2.c
@@ -1318,6 +1320,8 @@
src/math/sigmoid-sse2-p5-div.c)
SET(XNNPACK_SSSE3_MICROKERNEL_SRCS
+ src/qs8-gemm/1x4c2-minmax-ssse3.c
+ src/qs8-gemm/4x4c2-minmax-ssse3.c
src/qs8-requantization/precise-ssse3.c
src/qs8-requantization/q31-ssse3.c
src/qu8-requantization/precise-ssse3.c
@@ -1342,6 +1346,8 @@
src/f32-vrnd/gen/vrndu-sse41-x8.c
src/f32-vrnd/gen/vrndd-sse41-x4.c
src/f32-vrnd/gen/vrndd-sse41-x8.c
+ src/qs8-gemm/1x4c2-minmax-sse41.c
+ src/qs8-gemm/4x4c2-minmax-sse41.c
src/qs8-requantization/fp32-sse4.c
src/qs8-requantization/precise-sse4.c
src/qs8-requantization/q31-sse4.c
@@ -3288,6 +3294,15 @@
TARGET_LINK_LIBRARIES(f32-vrsubc-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-vrsubc-minmax-test f32-vrsubc-minmax-test)
+ ADD_EXECUTABLE(qs8-gemm-minmax-test test/qs8-gemm-minmax.cc)
+ SET_TARGET_PROPERTIES(qs8-gemm-minmax-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(qs8-gemm-minmax-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(qs8-gemm-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(qs8-gemm-minmax-test qs8-gemm-minmax-test)
+
ADD_EXECUTABLE(qu8-avgpool-minmax-test test/qu8-avgpool-minmax.cc)
SET_TARGET_PROPERTIES(qu8-avgpool-minmax-test PROPERTIES
CXX_STANDARD 11
@@ -3859,6 +3874,15 @@
TARGET_INCLUDE_DIRECTORIES(f32-vsqrt-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
TARGET_LINK_LIBRARIES(f32-vsqrt-bench PRIVATE XNNPACK fp16 benchmark bench-utils)
+ ADD_EXECUTABLE(qs8-gemm-bench bench/qs8-gemm.cc)
+ SET_TARGET_PROPERTIES(qs8-gemm-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(qs8-gemm-bench PRIVATE src)
+ TARGET_INCLUDE_DIRECTORIES(qs8-gemm-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+ TARGET_LINK_LIBRARIES(qs8-gemm-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
+
ADD_EXECUTABLE(qu8-gemm-bench bench/qu8-gemm.cc)
SET_TARGET_PROPERTIES(qu8-gemm-bench PROPERTIES
CXX_STANDARD 11