4x8 QU8 Neon Dotproduct microkernel rename from ld64 to ld128

- rename ld64 to ld128 for filenames and functions

PiperOrigin-RevId: 394751366
diff --git a/bench/qu8-gemm-e2e.cc b/bench/qu8-gemm-e2e.cc
index a4e8a1a..0c833f9 100644
--- a/bench/qu8-gemm-e2e.cc
+++ b/bench/qu8-gemm-e2e.cc
@@ -98,10 +98,10 @@
       4 /* mr */, 16  /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
-  static void qu8_gemm_4x8c4__aarch64_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qu8_gemm_4x8c4__aarch64_neondot_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld64,
-      xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld64,
+      xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128,
+      xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128,
       xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
       xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
       xnn_init_qu8_conv_minmax_rndnu_neon_params,
@@ -118,10 +118,10 @@
       4 /* mr */, 8  /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
-  BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__aarch64_neondot_ld128);
-  BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__aarch64_neondot_cortex_a55);
-  BENCHMARK_QU8_END2END(qu8_gemm_4x8c4__aarch64_neondot_ld64);
   BENCHMARK_QU8_END2END(qu8_gemm_4x8c4__aarch64_neondot_cortex_a55);
+  BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__aarch64_neondot_cortex_a55);
+  BENCHMARK_QU8_END2END(qu8_gemm_4x8c4__aarch64_neondot_ld128);
+  BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__aarch64_neondot_ld128);
 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
diff --git a/bench/qu8-gemm.cc b/bench/qu8-gemm.cc
index 8f6c964..3f3036e 100644
--- a/bench/qu8-gemm.cc
+++ b/bench/qu8-gemm.cc
@@ -311,9 +311,9 @@
       xnn_init_qu8_conv_minmax_rndnu_neon_params,
       4, 16, 4, 1, benchmark::utils::CheckNEONDOT);
   }
-  static void qu8_gemm_4x8c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
+  static void qu8_gemm_4x8c4__aarch64_neondot_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state,
-      xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld64,
+      xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128,
       xnn_init_qu8_conv_minmax_rndnu_neon_params,
       4, 8, 4, 1,
       benchmark::utils::CheckNEONDOT);
@@ -352,10 +352,10 @@
       4, 16, 1, 1,
       benchmark::utils::CheckNEON);
   }
-  BENCHMARK_GEMM(qu8_gemm_4x16c4__aarch64_neondot_ld128)
-  BENCHMARK_GEMM(qu8_gemm_4x16c4__aarch64_neondot_cortex_a55)
-  BENCHMARK_GEMM(qu8_gemm_4x8c4__aarch64_neondot_ld64)
   BENCHMARK_GEMM(qu8_gemm_4x8c4__aarch64_neondot_cortex_a55)
+  BENCHMARK_GEMM(qu8_gemm_4x16c4__aarch64_neondot_cortex_a55)
+  BENCHMARK_GEMM(qu8_gemm_4x8c4__aarch64_neondot_ld128)
+  BENCHMARK_GEMM(qu8_gemm_4x16c4__aarch64_neondot_ld128)
   BENCHMARK_GEMM(qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53)
   BENCHMARK_GEMM(qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
   BENCHMARK_GEMM(qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a75)