Code-generate QU8 GEMM and IGEMM microkernels for SSE2/SSSE3/SSE4.1

PiperOrigin-RevId: 382681546
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-fp32-sse41-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-fp32-sse41-ld128.c
index d262753..5a51a7c 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-fp32-sse41-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-fp32-sse41-ld128.c
@@ -179,9 +179,9 @@
         vout = _mm_srli_epi32(vout, 16);
       }
       if (nc & 1) {
-        *((int8_t*) c2) = (int8_t) _mm_extract_epi8(vout, 8);
-        *((int8_t*) c1) = (int8_t) _mm_extract_epi8(vout, 4);
-        *((int8_t*) c0) = (int8_t) _mm_extract_epi8(vout, 0);
+        *c2 = (int8_t) _mm_extract_epi8(vout, 8);
+        *c1 = (int8_t) _mm_extract_epi8(vout, 4);
+        *c0 = (int8_t) _mm_extract_epi8(vout, 0);
       }
 
       nc = 0;