6x8 ld128 GEMM microkernels

12 new kernels
dup and lane
neon and neonfma
GEMM, GEMMINC and GEMM

PiperOrigin-RevId: 282661111
diff --git a/test/f32-gemminc.yaml b/test/f32-gemminc.yaml
index 3f8a9a8..4fe377a 100644
--- a/test/f32-gemminc.yaml
+++ b/test/f32-gemminc.yaml
@@ -75,6 +75,8 @@
   k-block: 2
 - name: xnn_f32_gemminc_ukernel_6x8__neon_lane_ld64
   k-block: 2
+- name: xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128
+  k-block: 4
 - name: xnn_f32_gemminc_ukernel_1x8__neonfma_lane_ld64
   k-block: 2
   arch:
@@ -95,6 +97,10 @@
   k-block: 2
   arch:
     - aarch64
+- name: xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128
+  k-block: 4
+  arch:
+    - aarch64
 - name: xnn_f32_gemminc_ukernel_1x8__neon_dup_ld64
   k-block: 2
 - name: xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64
@@ -103,6 +109,8 @@
   k-block: 4
 - name: xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64
   k-block: 2
+- name: xnn_f32_gemminc_ukernel_6x8__neon_dup_ld128
+  k-block: 4
 - name: xnn_f32_gemminc_ukernel_1x8__neonfma_dup_ld64
   k-block: 2
 - name: xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64
@@ -111,6 +119,8 @@
   k-block: 4
 - name: xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64
   k-block: 2
+- name: xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld128
+  k-block: 4
 - name: xnn_f32_gemminc_ukernel_1x8s4__neon
   k-block: 4
 - name: xnn_f32_gemminc_ukernel_4x8s4__neon