Scalar implementations of QU8 GEMM/IGEMM microkernels

PiperOrigin-RevId: 385000129
diff --git a/src/qc8-igemm/gen/1x4-minmax-fp32-scalar-lrint.c b/src/qc8-igemm/gen/1x4-minmax-fp32-scalar-lrint.c
index 0b72bd6..a632b3e 100644
--- a/src/qc8-igemm/gen/1x4-minmax-fp32-scalar-lrint.c
+++ b/src/qc8-igemm/gen/1x4-minmax-fp32-scalar-lrint.c
@@ -45,7 +45,7 @@
     int32_t vacc0x1 = ((const int32_t*) w)[1];
     int32_t vacc0x2 = ((const int32_t*) w)[2];
     int32_t vacc0x3 = ((const int32_t*) w)[3];
-    w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
+    w = (const void*) ((const int32_t*) w + 4);
 
     size_t p = ks;
     do {
@@ -60,11 +60,11 @@
       do {
         const int32_t va0 = (int32_t) *a0++;
 
-        const int32_t vb0 = ((const int8_t*) w)[0];
-        const int32_t vb1 = ((const int8_t*) w)[1];
-        const int32_t vb2 = ((const int8_t*) w)[2];
-        const int32_t vb3 = ((const int8_t*) w)[3];
-        w = (const void*) ((uintptr_t) w + 4 * sizeof(int8_t));
+        const int32_t vb0 = (int32_t) ((const int8_t*) w)[0];
+        const int32_t vb1 = (int32_t) ((const int8_t*) w)[1];
+        const int32_t vb2 = (int32_t) ((const int8_t*) w)[2];
+        const int32_t vb3 = (int32_t) ((const int8_t*) w)[3];
+        w = (const void*) ((const int8_t*) w + 4);
 
         vacc0x0 += va0 * vb0;
         vacc0x1 += va0 * vb1;