QS8 Neon GEMM microkernel with 8 bit multiply and vpadal to accumulate

PiperOrigin-RevId: 353315852
diff --git a/BUILD.bazel b/BUILD.bazel
index 51ed10a..20b2d12 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1698,20 +1698,28 @@
     "src/qs8-gavgpool/gen/7x-minmax-neon-c32-acc2.c",
     "src/qs8-gemm/gen/1x8-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/1x8-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/1x8c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-gemm/gen/1x16-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/1x16-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/1x16c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-gemm/gen/2x8-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/2x8-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/2x8c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-gemm/gen/2x16-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/2x16-minmax-neon-mull-addw-dup.c",
-    "src/qs8-gemm/gen/3x8-minmax-neon-mlal-lane.c",
-    "src/qs8-gemm/gen/3x8-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/2x16c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-gemm/gen/3x16-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/3x16-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/3x16c2-minmax-neon-mull-padal-dup.c",
+    "src/qs8-gemm/gen/3x8-minmax-neon-mlal-lane.c",
+    "src/qs8-gemm/gen/3x8-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/3x8c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-gemm/gen/4x8-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/4x8-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/4x8c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-gemm/gen/4x16-minmax-neon-mlal-lane.c",
     "src/qs8-gemm/gen/4x16-minmax-neon-mull-addw-dup.c",
+    "src/qs8-gemm/gen/4x16c2-minmax-neon-mull-padal-dup.c",
     "src/qs8-igemm/gen/1x8-minmax-neon-mlal-lane.c",
     "src/qs8-igemm/gen/1x16-minmax-neon-mlal-lane.c",
     "src/qs8-igemm/gen/2x8-minmax-neon-mlal-lane.c",