C2S4 QS8 Neon GEMM/IGEMM microkernels.

- Rename existing S4C2 to C2S4 as it isa shuffle variant of the C2 dup microkernel.
- Add QC8 microkernels
- Add all sizes.  Same set of microkernels as C2 and C4, except no gemmlowp.

PiperOrigin-RevId: 408950530
diff --git a/test/qs8-igemm-minmax-fp32.yaml b/test/qs8-igemm-minmax-fp32.yaml
index 5dd77a9..1024cef 100644
--- a/test/qs8-igemm-minmax-fp32.yaml
+++ b/test/qs8-igemm-minmax-fp32.yaml
@@ -3,6 +3,18 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal_padal
+  init: xnn_init_qs8_conv_minmax_fp32_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal_padal
+  init: xnn_init_qs8_conv_minmax_fp32_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal_padal
+  init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal_padal
+  init: xnn_init_qs8_conv_minmax_fp32_neonv8_params
+  k-block: 16
 - name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_padal_dup
   init: xnn_init_qs8_conv_minmax_fp32_neon_params
   k-block: 16