Refactor argument names in QS8 VADD[C] microkernels
Rename input_x/input_y to input_a/input_b, and rename variables accordingly
PiperOrigin-RevId: 385693221
diff --git a/src/qs8-vadd/gen/minmax-avx-mul32-ld32-x16.c b/src/qs8-vadd/gen/minmax-avx-mul32-ld32-x16.c
index 77e431e..1170d95 100644
--- a/src/qs8-vadd/gen/minmax-avx-mul32-ld32-x16.c
+++ b/src/qs8-vadd/gen/minmax-avx-mul32-ld32-x16.c
@@ -17,14 +17,14 @@
void xnn_qs8_vadd_minmax_ukernel__avx_mul32_ld32_x16(
size_t n,
- const int8_t* input_x,
- const int8_t* input_y,
+ const int8_t* input_a,
+ const int8_t* input_b,
int8_t* output,
const union xnn_qs8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
{
const __m128i vzero_point_product = _mm_load_si128((const __m128i*) params->sse2.zero_point_product);
- const __m128i vx_multiplier = _mm_load_si128((const __m128i*) params->sse2.x_multiplier);
- const __m128i vy_multiplier = _mm_load_si128((const __m128i*) params->sse2.y_multiplier);
+ const __m128i va_multiplier = _mm_load_si128((const __m128i*) params->sse2.x_multiplier);
+ const __m128i vb_multiplier = _mm_load_si128((const __m128i*) params->sse2.y_multiplier);
const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift);
@@ -33,26 +33,26 @@
const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
for (; n >= 16 * sizeof(int8_t); n -= 16 * sizeof(int8_t)) {
- const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x));
- const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y));
- const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4));
- const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4));
- const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8));
- const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8));
- const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12));
- const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12));
- input_x += 16;
- input_y += 16;
+ const __m128i va0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a));
+ const __m128i vb0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_b));
+ const __m128i va4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a + 4));
+ const __m128i vb4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_b + 4));
+ const __m128i va89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a + 8));
+ const __m128i vb89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_b + 8));
+ const __m128i vaCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a + 12));
+ const __m128i vbCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_b + 12));
+ input_a += 16;
+ input_b += 16;
- __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier));
- __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier));
- __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx89AB, vx_multiplier));
- __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxCDEF, vx_multiplier));
+ __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va0123, va_multiplier));
+ __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va4567, va_multiplier));
+ __m128i vacc89AB = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va89AB, va_multiplier));
+ __m128i vaccCDEF = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vaCDEF, va_multiplier));
- vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier));
- vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier));
- vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vy89AB, vy_multiplier));
- vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vyCDEF, vy_multiplier));
+ vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vb0123, vb_multiplier));
+ vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vb4567, vb_multiplier));
+ vacc89AB = _mm_add_epi32(vacc89AB, _mm_mullo_epi32(vb89AB, vb_multiplier));
+ vaccCDEF = _mm_add_epi32(vaccCDEF, _mm_mullo_epi32(vbCDEF, vb_multiplier));
const __m128i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123));
const __m128i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567));
@@ -80,18 +80,18 @@
}
if XNN_UNLIKELY(n != 0) {
do {
- const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x));
- const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y));
- const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4));
- const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4));
- input_x += 8;
- input_y += 8;
+ const __m128i va0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a));
+ const __m128i vb0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_b));
+ const __m128i va4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_a + 4));
+ const __m128i vb4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_b + 4));
+ input_a += 8;
+ input_b += 8;
- __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx0123, vx_multiplier));
- __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier));
+ __m128i vacc0123 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va0123, va_multiplier));
+ __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(va4567, va_multiplier));
- vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vy0123, vy_multiplier));
- vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier));
+ vacc0123 = _mm_add_epi32(vacc0123, _mm_mullo_epi32(vb0123, vb_multiplier));
+ vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vb4567, vb_multiplier));
const __m128i vrem0123 = _mm_add_epi32(_mm_and_si128(vacc0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123));
const __m128i vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567));