Fix QS8 IGEMM with FP32 requantization for SSE/AVX/XOP

PiperOrigin-RevId: 377211275
diff --git a/test/qs8-igemm-minmax-fp32.yaml b/test/qs8-igemm-minmax-fp32.yaml
index 2b6fa36..9f060e5 100644
--- a/test/qs8-igemm-minmax-fp32.yaml
+++ b/test/qs8-igemm-minmax-fp32.yaml
@@ -3,190 +3,190 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse2_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
-- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128
   init: xnn_init_qs8_conv_minmax_fp32_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2