Code-generate QU8 GEMM and IGEMM microkernels for SSE2/SSSE3/SSE4.1

PiperOrigin-RevId: 382681546
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 23d9d72..46d17a6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2178,9 +2178,10 @@
   src/qu8-dwconv/up8x9-minmax-gemmlowp-sse2.c
   src/qu8-gavgpool/7p7x-minmax-sse2-c8.c
   src/qu8-gavgpool/7x-minmax-sse2-c8.c
-  src/qu8-gemm/2x4c8-minmax-gemmlowp-sse2.c
-  src/qu8-gemm/4x4c2-minmax-gemmlowp-sse2.c
-  src/qu8-igemm/4x4c2-minmax-gemmlowp-sse2.c
+  src/qu8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+  src/qu8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
+  src/qu8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+  src/qu8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
   src/qu8-requantization/fp32-sse2.c
   src/qu8-requantization/gemmlowp-sse2.c
   src/qu8-requantization/rndna-sse2.c
@@ -2270,6 +2271,10 @@
   src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
   src/qs8-requantization/gemmlowp-ssse3.c
   src/qs8-requantization/rndna-ssse3.c
+  src/qu8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+  src/qu8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
+  src/qu8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+  src/qu8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
   src/qu8-requantization/gemmlowp-ssse3.c
   src/qu8-requantization/rndna-ssse3.c)
 
@@ -2468,6 +2473,10 @@
   src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x16.c
   src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x24.c
   src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x32.c
+  src/qu8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+  src/qu8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
+  src/qu8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+  src/qu8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
   src/qu8-requantization/gemmlowp-sse4.c
   src/qu8-requantization/rndna-sse4.c)