Add xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c4__neondot (ARMv8.2+dotprod).

PiperOrigin-RevId: 326503942
diff --git a/BUILD.bazel b/BUILD.bazel
index f08a136..157497a 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1618,6 +1618,12 @@
     "src/f16-spmm/gen/32x1-minmax-neonfp16arith-unroll2.c",
 ]
 
+NEONDOT_UKERNELS = [
+    "src/qs8-gemm/gen/8x8c4-minmax-neondot.c",
+    "src/qs8-gemm/gen/12x8c4-minmax-neondot.c",
+    "src/qs8-gemm/gen/4x16c4-minmax-neondot.c",
+]
+
 SSE_UKERNELS = [
     "src/f32-avgpool/9p8x-minmax-sse-c4.c",
     "src/f32-avgpool/9x-minmax-sse-c4.c",
@@ -3005,6 +3011,42 @@
 )
 
 xnnpack_cc_library(
+    name = "neondot_ukernels",
+    hdrs = INTERNAL_HDRS,
+    aarch32_copts = ["-march=armv8.2-a+dotprod"],
+    aarch32_srcs = NEONDOT_UKERNELS,
+    aarch64_copts = ["-march=armv8.2-a+dotprod"],
+    aarch64_srcs = NEONDOT_UKERNELS,
+    gcc_copts = xnnpack_gcc_std_copts(),
+    msvc_copts = xnnpack_msvc_std_copts(),
+    deps = [
+        ":tables",
+        "@FP16",
+        "@pthreadpool",
+    ],
+)
+
+xnnpack_cc_library(
+    name = "neondot_ukernels_test_mode",
+    hdrs = INTERNAL_HDRS,
+    aarch32_copts = ["-march=armv8.2-a+dotprod"],
+    aarch32_srcs = NEONDOT_UKERNELS,
+    aarch64_copts = ["-march=armv8.2-a+dotprod"],
+    aarch64_srcs = NEONDOT_UKERNELS,
+    copts = [
+        "-UNDEBUG",
+        "-DXNN_TEST_MODE=1",
+    ],
+    gcc_copts = xnnpack_gcc_std_copts(),
+    msvc_copts = xnnpack_msvc_std_copts(),
+    deps = [
+        ":tables",
+        "@FP16",
+        "@pthreadpool",
+    ],
+)
+
+xnnpack_cc_library(
     name = "sse2_ukernels",
     hdrs = INTERNAL_HDRS,
     gcc_copts = xnnpack_gcc_std_copts(),
@@ -3383,6 +3425,7 @@
         ":neon_ukernels",
         ":neonfma_ukernels",
         ":neonv8_ukernels",
+        ":neondot_ukernels",
         ":asm_ukernels",
     ],
     aarch64_deps = [
@@ -3390,6 +3433,7 @@
         ":neonfma_ukernels",
         ":neonv8_ukernels",
         ":neonfp16arith_ukernels",
+        ":neondot_ukernels",
         ":asm_ukernels",
     ],
     generic_deps = [
@@ -3424,6 +3468,7 @@
         ":neon_ukernels_test_mode",
         ":neonfma_ukernels_test_mode",
         ":neonv8_ukernels_test_mode",
+        ":neondot_ukernels_test_mode",
         ":asm_ukernels",
     ],
     aarch64_deps = [
@@ -3431,6 +3476,7 @@
         ":neonfma_ukernels_test_mode",
         ":neonv8_ukernels_test_mode",
         ":neonfp16arith_ukernels_test_mode",
+        ":neondot_ukernels_test_mode",
         ":asm_ukernels",
     ],
     generic_deps = [