QS8 C4 Neon GEMM/IGEMM microkernels
- Based on C2 microkernel which uses DUP to isolate/duplicate input channels
- Instead of 2 channels, DUP 4 channels. PADAL to accumulate.
- Outside loop do partial sum of 2 channels.
PiperOrigin-RevId: 407440032
diff --git a/test/qs8-igemm-minmax-fp32.yaml b/test/qs8-igemm-minmax-fp32.yaml
index 2fca7db..5dd77a9 100644
--- a/test/qs8-igemm-minmax-fp32.yaml
+++ b/test/qs8-igemm-minmax-fp32.yaml
@@ -3,6 +3,30 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neon_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neon_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neon_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neon_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
+ k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_padal_dup
+ init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
+ k-block: 16
- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_padal
init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
k-block: 16
@@ -57,18 +81,6 @@
- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane
init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
k-block: 8
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_padal_dup
- init: xnn_init_qs8_conv_minmax_fp32_neon_params
- k-block: 16
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_padal_dup
- init: xnn_init_qs8_conv_minmax_fp32_neon_params
- k-block: 16
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_padal_dup
- init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
- k-block: 16
-- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_padal_dup
- init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
- k-block: 16
- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal_padal
init: xnn_init_qs8_conv_minmax_fp32_neon_params
k-block: 16