X8 LUT NEON microkernels

PiperOrigin-RevId: 395833468
diff --git a/BUILD.bazel b/BUILD.bazel
index 3bfad3d..6a68ca6 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -2899,7 +2899,7 @@
     "src/math/sqrt-neonfma-nr3fma.c",
 ]
 
-PROD_AARCH64_NEONFMA_MICROKERNEL_SRCS = [
+PROD_AARCH64_NEON_MICROKERNEL_SRCS = [
     "src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c",
     "src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-3x4.c",
     "src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4-acc2.c",
@@ -2919,7 +2919,7 @@
     "src/f32-vsqrt/gen/neon-sqrt-x4.c",
 ]
 
-ALL_AARCH64_NEONFMA_MICROKERNEL_SRCS = [
+ALL_AARCH64_NEON_MICROKERNEL_SRCS = [
     "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neonfma-2x1.c",
     "src/f32-conv-hwc/gen/3x3s2p0p1c3x4-neonfma-2x2.c",
     "src/f32-conv-hwc/gen/3x3s2p0p1c3x8-neonfma-2x1.c",
@@ -3032,6 +3032,10 @@
     "src/math/sigmoid-neonfma-rr2-lut64-p2-div.c",
     "src/math/sigmoid-neonfma-rr2-lut2048-p1-div.c",
     "src/math/sigmoid-neonfma-rr2-p5-div.c",
+    "src/x8-lut/gen/lut-neon-tbx128x4-x16.c",
+    "src/x8-lut/gen/lut-neon-tbx128x4-x32.c",
+    "src/x8-lut/gen/lut-neon-tbx128x4-x48.c",
+    "src/x8-lut/gen/lut-neon-tbx128x4-x64.c",
 ]
 
 PROD_NEONV8_MICROKERNEL_SRCS = [
@@ -6020,7 +6024,7 @@
         "-mfpu=neon",
     ],
     aarch32_srcs = ALL_NEON_MICROKERNEL_SRCS,
-    aarch64_srcs = ALL_NEON_MICROKERNEL_SRCS,
+    aarch64_srcs = ALL_NEON_MICROKERNEL_SRCS + ALL_AARCH64_NEON_MICROKERNEL_SRCS,
     gcc_copts = xnnpack_gcc_std_copts(),
     msvc_copts = xnnpack_msvc_std_copts(),
     deps = [
@@ -6039,7 +6043,7 @@
         "-mfpu=neon",
     ],
     aarch32_srcs = PROD_NEON_MICROKERNEL_SRCS,
-    aarch64_srcs = PROD_NEON_MICROKERNEL_SRCS,
+    aarch64_srcs = PROD_NEON_MICROKERNEL_SRCS + PROD_AARCH64_NEON_MICROKERNEL_SRCS,
     gcc_copts = xnnpack_gcc_std_copts(),
     msvc_copts = xnnpack_msvc_std_copts(),
     deps = [
@@ -6058,7 +6062,7 @@
         "-mfpu=neon",
     ],
     aarch32_srcs = ALL_NEON_MICROKERNEL_SRCS,
-    aarch64_srcs = ALL_NEON_MICROKERNEL_SRCS,
+    aarch64_srcs = ALL_NEON_MICROKERNEL_SRCS + ALL_AARCH64_NEON_MICROKERNEL_SRCS,
     copts = [
         "-UNDEBUG",
         "-DXNN_TEST_MODE=1",
@@ -6081,7 +6085,7 @@
         "-mfpu=neon-vfpv4",
     ],
     aarch32_srcs = ALL_NEONFMA_MICROKERNEL_SRCS,
-    aarch64_srcs = ALL_NEONFMA_MICROKERNEL_SRCS + ALL_AARCH64_NEONFMA_MICROKERNEL_SRCS,
+    aarch64_srcs = ALL_NEONFMA_MICROKERNEL_SRCS,
     apple_aarch32_copts = [
         "-mcpu=swift",
         "-mtune=generic",
@@ -6104,7 +6108,7 @@
         "-mfpu=neon-vfpv4",
     ],
     aarch32_srcs = PROD_NEONFMA_MICROKERNEL_SRCS,
-    aarch64_srcs = PROD_NEONFMA_MICROKERNEL_SRCS + PROD_AARCH64_NEONFMA_MICROKERNEL_SRCS,
+    aarch64_srcs = PROD_NEONFMA_MICROKERNEL_SRCS,
     apple_aarch32_copts = [
         "-mcpu=swift",
         "-mtune=generic",
@@ -6127,7 +6131,7 @@
         "-mfpu=neon-vfpv4",
     ],
     aarch32_srcs = ALL_NEONFMA_MICROKERNEL_SRCS,
-    aarch64_srcs = ALL_NEONFMA_MICROKERNEL_SRCS + ALL_AARCH64_NEONFMA_MICROKERNEL_SRCS,
+    aarch64_srcs = ALL_NEONFMA_MICROKERNEL_SRCS,
     apple_aarch32_copts = [
         "-mcpu=swift",
         "-mtune=generic",