Rename zero_point_product parameter to bias in Quantized Add microkernels
PiperOrigin-RevId: 385704949
diff --git a/src/qs8-vaddc/gen/minmax-avx-mul32-ld32-x32.c b/src/qs8-vaddc/gen/minmax-avx-mul32-ld32-x32.c
index d8c9bc1..1d21d79 100644
--- a/src/qs8-vaddc/gen/minmax-avx-mul32-ld32-x32.c
+++ b/src/qs8-vaddc/gen/minmax-avx-mul32-ld32-x32.c
@@ -30,9 +30,9 @@
const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
- __m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.b_multiplier[0] * (int32_t) *input_b);
- vzero_point_product = _mm_shuffle_epi32(vzero_point_product, _MM_SHUFFLE(0, 0, 0, 0));
- vzero_point_product = _mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product));
+ __m128i vbias = _mm_cvtsi32_si128(params->sse2.b_multiplier[0] * (int32_t) *input_b);
+ vbias = _mm_shuffle_epi32(vbias, _MM_SHUFFLE(0, 0, 0, 0));
+ vbias = _mm_add_epi32(vbias, _mm_load_si128((const __m128i*) params->sse2.bias));
for (; n >= 32 * sizeof(int8_t); n -= 32 * sizeof(int8_t)) {
const __m128i va0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a));
const __m128i va4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a + 4));
@@ -45,14 +45,14 @@
input_a += 32;
input_b += 32;
- __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va0123, va_multiplier));
- __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va4567, va_multiplier));
- __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va89AB, va_multiplier));
- __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vaCDEF, va_multiplier));
- __m128i vaccGHIJ = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vaGHIJ, va_multiplier));
- __m128i vaccKLMN = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vaKLMN, va_multiplier));
- __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vaOPQR, va_multiplier));
- __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vaSTUV, va_multiplier));
+ __m128i vacc0123 = _mm_add_epi32(vbias, _mm_mullo_epi32(va0123, va_multiplier));
+ __m128i vacc4567 = _mm_add_epi32(vbias, _mm_mullo_epi32(va4567, va_multiplier));
+ __m128i vacc89AB = _mm_add_epi32(vbias, _mm_mullo_epi32(va89AB, va_multiplier));
+ __m128i vaccCDEF = _mm_add_epi32(vbias, _mm_mullo_epi32(vaCDEF, va_multiplier));
+ __m128i vaccGHIJ = _mm_add_epi32(vbias, _mm_mullo_epi32(vaGHIJ, va_multiplier));
+ __m128i vaccKLMN = _mm_add_epi32(vbias, _mm_mullo_epi32(vaKLMN, va_multiplier));
+ __m128i vaccOPQR = _mm_add_epi32(vbias, _mm_mullo_epi32(vaOPQR, va_multiplier));
+ __m128i vaccSTUV = _mm_add_epi32(vbias, _mm_mullo_epi32(vaSTUV, va_multiplier));
const __m128i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123));
const __m128i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567));
@@ -100,8 +100,8 @@
const __m128i va4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a + 4));
input_a += 8;
- __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va0123, va_multiplier));
- __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va4567, va_multiplier));
+ __m128i vacc0123 = _mm_add_epi32(vbias, _mm_mullo_epi32(va0123, va_multiplier));
+ __m128i vacc4567 = _mm_add_epi32(vbias, _mm_mullo_epi32(va4567, va_multiplier));
const __m128i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123));
const __m128i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567));