QU8 NEON Assembly remove channel wise

- Remove template support for CHANNELWISE
- Remove push/pop D9
- Sort filenames in BUILD files

PiperOrigin-RevId: 392769881
diff --git a/BUILD.bazel b/BUILD.bazel
index 5c35c62..715c655 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -5648,17 +5648,17 @@
     "src/qs8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld64.S",
     "src/qs8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
     "src/qu8-gemm/gen/4x8c4-minmax-rndnu-aarch64-neondot-ld64.S",
-    "src/qu8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
     "src/qu8-gemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-cortex-a53.S",
     "src/qu8-gemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-cortex-a75.S",
     "src/qu8-gemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-prfm-cortex-a53.S",
     "src/qu8-gemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-prfm-cortex-a75.S",
+    "src/qu8-gemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
     "src/qu8-igemm/gen/4x8c4-minmax-rndnu-aarch64-neondot-ld64.S",
-    "src/qu8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
     "src/qu8-igemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-cortex-a53.S",
     "src/qu8-igemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-cortex-a75.S",
     "src/qu8-igemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-prfm-cortex-a53.S",
     "src/qu8-igemm/gen/4x16-minmax-rndnu-aarch64-neon-mlal-lane-prfm-cortex-a75.S",
+    "src/qu8-igemm/gen/4x16c4-minmax-rndnu-aarch64-neondot-ld128.S",
 ]
 
 INTERNAL_MICROKERNEL_HDRS = [