X8 LUT NEON microkernels

PiperOrigin-RevId: 395833468
diff --git a/test/x8-lut.cc b/test/x8-lut.cc
index c55e77e..a101c4e 100644
--- a/test/x8-lut.cc
+++ b/test/x8-lut.cc
@@ -194,4 +194,191 @@
       .inplace(true)
       .Test(xnn_x8_lut_ukernel__scalar_x16);
   }
-}
\ No newline at end of file
+}
+
+#if XNN_ARCH_ARM64
+  TEST(X8_LUT__NEON_TBX128X4_X16, batch_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    LUTMicrokernelTester()
+      .batch_size(16)
+      .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X16, batch_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X16, batch_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size < 16; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X16, batch_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 17; batch_size < 32; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X16, inplace) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x16);
+    }
+  }
+#endif  // XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM64
+  TEST(X8_LUT__NEON_TBX128X4_X32, batch_eq_32) {
+    TEST_REQUIRES_ARM_NEON;
+    LUTMicrokernelTester()
+      .batch_size(32)
+      .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X32, batch_div_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X32, batch_lt_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size < 32; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X32, batch_gt_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 33; batch_size < 64; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X32, inplace) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x32);
+    }
+  }
+#endif  // XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM64
+  TEST(X8_LUT__NEON_TBX128X4_X48, batch_eq_48) {
+    TEST_REQUIRES_ARM_NEON;
+    LUTMicrokernelTester()
+      .batch_size(48)
+      .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X48, batch_div_48) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X48, batch_lt_48) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size < 48; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X48, batch_gt_48) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 49; batch_size < 96; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X48, inplace) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x48);
+    }
+  }
+#endif  // XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM64
+  TEST(X8_LUT__NEON_TBX128X4_X64, batch_eq_64) {
+    TEST_REQUIRES_ARM_NEON;
+    LUTMicrokernelTester()
+      .batch_size(64)
+      .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X64, batch_div_64) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X64, batch_lt_64) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size < 64; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X64, batch_gt_64) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 65; batch_size < 128; batch_size++) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
+    }
+  }
+
+  TEST(X8_LUT__NEON_TBX128X4_X64, inplace) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
+      LUTMicrokernelTester()
+        .batch_size(batch_size)
+        .inplace(true)
+        .Test(xnn_x8_lut_ukernel__neon_tbx128x4_x64);
+    }
+  }
+#endif  // XNN_ARCH_ARM64