4x16 QS8 microkernel for Cortex A53
- GEMM and IGEMM
- prefetch and non-prefetch
PiperOrigin-RevId: 374297582
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7105da2..c318a55 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -631,20 +631,20 @@
src/qs8-gavgpool/gen/7x-minmax-scalar-c2.c
src/qs8-gavgpool/gen/7x-minmax-scalar-c4.c
src/qs8-gemm/gen/1x2-minmax-scalar.c
- src/qs8-gemm/gen/2x2-minmax-scalar.c
- src/qs8-gemm/gen/3x2-minmax-scalar.c
- src/qs8-gemm/gen/4x2-minmax-scalar.c
src/qs8-gemm/gen/1x4-minmax-scalar.c
+ src/qs8-gemm/gen/2x2-minmax-scalar.c
src/qs8-gemm/gen/2x4-minmax-scalar.c
+ src/qs8-gemm/gen/3x2-minmax-scalar.c
src/qs8-gemm/gen/3x4-minmax-scalar.c
+ src/qs8-gemm/gen/4x2-minmax-scalar.c
src/qs8-gemm/gen/4x4-minmax-scalar.c
src/qs8-igemm/gen/1x2-minmax-scalar.c
- src/qs8-igemm/gen/2x2-minmax-scalar.c
- src/qs8-igemm/gen/3x2-minmax-scalar.c
- src/qs8-igemm/gen/4x2-minmax-scalar.c
src/qs8-igemm/gen/1x4-minmax-scalar.c
+ src/qs8-igemm/gen/2x2-minmax-scalar.c
src/qs8-igemm/gen/2x4-minmax-scalar.c
+ src/qs8-igemm/gen/3x2-minmax-scalar.c
src/qs8-igemm/gen/3x4-minmax-scalar.c
+ src/qs8-igemm/gen/4x2-minmax-scalar.c
src/qs8-igemm/gen/4x4-minmax-scalar.c
src/qs8-requantization/fp32-scalar-lrintf.c
src/qs8-requantization/fp32-scalar-magic.c
@@ -2992,25 +2992,29 @@
src/qs8-gemm/4x16c4-aarch64-neondot-cortex-a55.S
src/qs8-gemm/4x16c4-aarch64-neondot-ld32.S
src/qs8-gemm/4x16c4-aarch64-neondot-ld64.S
- src/qs8-gemm/gen/1x8c8-aarch64-neon-mlal-padal.S
- src/qs8-gemm/gen/1x8c8-aarch64-neon-mlal-padal-prfm.S
src/qs8-gemm/gen/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S
src/qs8-gemm/gen/1x8c8-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+ src/qs8-gemm/gen/1x8c8-aarch64-neon-mlal-padal-prfm.S
+ src/qs8-gemm/gen/1x8c8-aarch64-neon-mlal-padal.S
src/qs8-gemm/gen/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S
src/qs8-gemm/gen/2x8c8-aarch64-neon-mlal-padal-prfm-cortex-a53.S
src/qs8-gemm/gen/2x8c8-aarch64-neon-mlal-padal-prfm.S
src/qs8-gemm/gen/2x8c8-aarch64-neon-mlal-padal.S
+ src/qs8-gemm/gen/4x16-aarch64-neon-mlal-lane-cortex-a53.S
+ src/qs8-gemm/gen/4x16-aarch64-neon-mlal-lane-prfm-cortex-a53.S
src/qs8-igemm/2x8c16-aarch64-neon-mlal-padal.S
src/qs8-igemm/4x16c4-aarch64-neondot-cortex-a55.S
src/qs8-igemm/4x16c4-aarch64-neondot-ld64.S
- src/qs8-igemm/gen/1x8c8-aarch64-neon-mlal-padal.S
- src/qs8-igemm/gen/1x8c8-aarch64-neon-mlal-padal-prfm.S
src/qs8-igemm/gen/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S
src/qs8-igemm/gen/1x8c8-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+ src/qs8-igemm/gen/1x8c8-aarch64-neon-mlal-padal-prfm.S
+ src/qs8-igemm/gen/1x8c8-aarch64-neon-mlal-padal.S
src/qs8-igemm/gen/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S
src/qs8-igemm/gen/2x8c8-aarch64-neon-mlal-padal-prfm-cortex-a53.S
src/qs8-igemm/gen/2x8c8-aarch64-neon-mlal-padal-prfm.S
- src/qs8-igemm/gen/2x8c8-aarch64-neon-mlal-padal.S)
+ src/qs8-igemm/gen/2x8c8-aarch64-neon-mlal-padal.S
+ src/qs8-igemm/gen/4x16-aarch64-neon-mlal-lane-cortex-a53.S
+ src/qs8-igemm/gen/4x16-aarch64-neon-mlal-lane-prfm-cortex-a53.S)
SET(XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SCALAR_MICROKERNEL_SRCS})
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")