Add LD64 suffix in QS8 GEMM/IGEMM microkernels

LD64 denotes that weights are loaded 64 bits at a time and sign-extended to 128
bits

PiperOrigin-RevId: 324305250
diff --git a/BUILD.bazel b/BUILD.bazel
index 88c2245..af62d1c 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1640,12 +1640,12 @@
     "src/qs8-requantization/precise-sse2.c",
     "src/qs8-requantization/fp32-sse2.c",
     "src/qs8-requantization/q31-sse2.c",
-    "src/qs8-gemm/gen/1x4c2-minmax-sse2.c",
-    "src/qs8-gemm/gen/4x4c2-minmax-sse2.c",
-    "src/qs8-gemm/gen/1x4c8-minmax-sse2.c",
-    "src/qs8-gemm/gen/2x4c8-minmax-sse2.c",
-    "src/qs8-igemm/gen/1x4c2-minmax-sse2.c",
-    "src/qs8-igemm/gen/4x4c2-minmax-sse2.c",
+    "src/qs8-gemm/gen/1x4c2-minmax-sse2-ld64.c",
+    "src/qs8-gemm/gen/4x4c2-minmax-sse2-ld64.c",
+    "src/qs8-gemm/gen/1x4c8-minmax-sse2-ld64.c",
+    "src/qs8-gemm/gen/2x4c8-minmax-sse2-ld64.c",
+    "src/qs8-igemm/gen/1x4c2-minmax-sse2-ld64.c",
+    "src/qs8-igemm/gen/4x4c2-minmax-sse2-ld64.c",
     "src/qu8-avgpool/9p8x-minmax-sse2-c8.c",
     "src/qu8-avgpool/9x-minmax-sse2-c8.c",
     "src/qu8-igemm/4x4c2-minmax-sse2.c",
@@ -1680,12 +1680,12 @@
 ]
 
 SSSE3_UKERNELS = [
-    "src/qs8-gemm/gen/1x4c2-minmax-ssse3.c",
-    "src/qs8-gemm/gen/4x4c2-minmax-ssse3.c",
-    "src/qs8-gemm/gen/1x4c8-minmax-ssse3.c",
-    "src/qs8-gemm/gen/2x4c8-minmax-ssse3.c",
-    "src/qs8-igemm/gen/1x4c2-minmax-ssse3.c",
-    "src/qs8-igemm/gen/4x4c2-minmax-ssse3.c",
+    "src/qs8-gemm/gen/1x4c2-minmax-ssse3-ld64.c",
+    "src/qs8-gemm/gen/4x4c2-minmax-ssse3-ld64.c",
+    "src/qs8-gemm/gen/1x4c8-minmax-ssse3-ld64.c",
+    "src/qs8-gemm/gen/2x4c8-minmax-ssse3-ld64.c",
+    "src/qs8-igemm/gen/1x4c2-minmax-ssse3-ld64.c",
+    "src/qs8-igemm/gen/4x4c2-minmax-ssse3-ld64.c",
     "src/qs8-requantization/precise-ssse3.c",
     "src/qs8-requantization/q31-ssse3.c",
     "src/qu8-requantization/precise-ssse3.c",
@@ -1711,12 +1711,12 @@
     "src/f32-vrnd/gen/vrndu-sse41-x8.c",
     "src/f32-vrnd/gen/vrndd-sse41-x4.c",
     "src/f32-vrnd/gen/vrndd-sse41-x8.c",
-    "src/qs8-gemm/gen/1x4c2-minmax-sse41.c",
-    "src/qs8-gemm/gen/4x4c2-minmax-sse41.c",
-    "src/qs8-gemm/gen/1x4c8-minmax-sse41.c",
-    "src/qs8-gemm/gen/2x4c8-minmax-sse41.c",
-    "src/qs8-igemm/gen/1x4c2-minmax-sse41.c",
-    "src/qs8-igemm/gen/4x4c2-minmax-sse41.c",
+    "src/qs8-gemm/gen/1x4c2-minmax-sse41-ld64.c",
+    "src/qs8-gemm/gen/4x4c2-minmax-sse41-ld64.c",
+    "src/qs8-gemm/gen/1x4c8-minmax-sse41-ld64.c",
+    "src/qs8-gemm/gen/2x4c8-minmax-sse41-ld64.c",
+    "src/qs8-igemm/gen/1x4c2-minmax-sse41-ld64.c",
+    "src/qs8-igemm/gen/4x4c2-minmax-sse41-ld64.c",
     "src/qs8-requantization/fp32-sse4.c",
     "src/qs8-requantization/precise-sse4.c",
     "src/qs8-requantization/q31-sse4.c",