NEON variants of QS8 VADD[C] microkernels

PiperOrigin-RevId: 329788165
diff --git a/src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c b/src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c
index 6d056b8..efd2962 100644
--- a/src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c
+++ b/src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c
@@ -140,7 +140,7 @@
       if XNN_LIKELY(n >= (8 * sizeof(int8_t))) {
         _mm_storel_epi64((__m128i*) output, vout0123456701234567);
         output += 8;
-        n -= 8;
+        n -= 8 * sizeof(int8_t);
       } else {
         if (n & (4 * sizeof(int8_t))) {
           *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);