Support FP32 requantization in AVX2 QS8 microkernels

PiperOrigin-RevId: 375822588
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index de48d33..9741b73 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -148,13 +148,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -166,7 +166,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -178,8 +178,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);