Renumber labels in assembly sequentially

PiperOrigin-RevId: 334696570
diff --git a/src/f16-gemm/6x8-aarch64-neonfp16arith-ld64.S.in b/src/f16-gemm/6x8-aarch64-neonfp16arith-ld64.S.in
index b4b784b..9c2ddf9 100644
--- a/src/f16-gemm/6x8-aarch64-neonfp16arith-ld64.S.in
+++ b/src/f16-gemm/6x8-aarch64-neonfp16arith-ld64.S.in
@@ -119,7 +119,7 @@
 
          # Is there at least 4 halffloats (8 bytes)?
         SUBS x0, x2, 8  // k = kc - 8
-        B.LO 5f
+        B.LO 3f
 
         # Main loop - 4 halffloats of A (8 bytes)
         # 24 FMA + 6 ld64 A + 4 LDR B
@@ -165,10 +165,10 @@
         B.HS 1b
 
         # Is there a remainder?- 2 halffloats of A (4 bytes)
-        TBNZ x0, 2, 6f
+        TBNZ x0, 2, 4f
         # Is there a remainder?- 1 halffloats of A (2 bytes)
-        TBNZ x0, 1, 7f
-4:
+        TBNZ x0, 1, 5f
+2:
         # Scale and Clamp
         FMUL v20.8h, v20.8h, v6.8h
         # Load params values
@@ -195,7 +195,7 @@
         FMIN v30.8h, v30.8h, v5.8h
 
         # Store full 6 x 8
-        B.LO 8f
+        B.LO 6f
 
         $if INC:
           ST1 {v30.16b},  [x7], x0
@@ -227,9 +227,9 @@
         B.HI 0b
         RET
 
-5:
-        TBZ x0, 2, 7f
-6:
+3:
+        TBZ x0, 2, 5f
+4:
         # Remainder- 2 halffloats of A (4 bytes)
         LDR   s0,  [x3], 4
         LDR  q16, [x5], 16
@@ -254,9 +254,9 @@
         FMLA v28.8h, v17.8h,  v4.h[1]
         FMLA v30.8h, v17.8h,  v5.h[1]
 
-        TBZ x0, 1, 4b
+        TBZ x0, 1, 2b
 
-7:
+5:
         # Remainder- 1 halffloat of A (2 bytes)
         LDR   h0,  [x3], 2
         LDR  q16,  [x5], 16
@@ -271,84 +271,84 @@
         FMLA v26.8h, v16.8h,  v3.h[0]
         FMLA v28.8h, v16.8h,  v4.h[0]
         FMLA v30.8h, v16.8h,  v5.h[0]
-        B 4b
+        B 2b
 
         # Store odd width
+6:
+        TBZ x1, 2, 7f
+        $if INC:
+          STR d30,  [x7], 8
+          DUP d30, v30.d[1]
+          STR d28, [x13], 8
+          DUP d28, v28.d[1]
+          STR d26, [x14], 8
+          DUP d26, v26.d[1]
+          STR d24, [x17], 8
+          DUP d24, v24.d[1]
+          STR d22, [x16], 8
+          DUP d22, v22.d[1]
+          STR d20,  [x6], 8
+          DUP d20, v20.d[1]
+        $else:
+          STR d20,  [x6], 8
+          DUP d20, v20.d[1]
+          STR d22, [x16], 8
+          DUP d22, v22.d[1]
+          STR d24, [x17], 8
+          DUP d24, v24.d[1]
+          STR d26, [x14], 8
+          DUP d26, v26.d[1]
+          STR d28, [x13], 8
+          DUP d28, v28.d[1]
+          STR d30,  [x7], 8
+          DUP d30, v30.d[1]
+
+7:
+        TBZ x1, 1, 8f
+        $if INC:
+          STR s30,  [x7], 4
+          DUP s30, v30.s[1]
+          STR s28, [x13], 4
+          DUP s28, v28.s[1]
+          STR s26, [x14], 4
+          DUP s26, v26.s[1]
+          STR s24, [x17], 4
+          DUP s24, v24.s[1]
+          STR s22, [x16], 4
+          DUP s22, v22.s[1]
+          STR s20,  [x6], 4
+          DUP s20, v20.s[1]
+        $else:
+          STR s20,  [x6], 4
+          DUP s20, v20.s[1]
+          STR s22, [x16], 4
+          DUP s22, v22.s[1]
+          STR s24, [x17], 4
+          DUP s24, v24.s[1]
+          STR s26, [x14], 4
+          DUP s26, v26.s[1]
+          STR s28, [x13], 4
+          DUP s28, v28.s[1]
+          STR s30,  [x7], 4
+          DUP s30, v30.s[1]
+
 8:
-        TBZ x1, 2, 9f
+        TBZ x1, 0, 9f
         $if INC:
-          STR d30,  [x7], 8
-          DUP d30, v30.d[1]
-          STR d28, [x13], 8
-          DUP d28, v28.d[1]
-          STR d26, [x14], 8
-          DUP d26, v26.d[1]
-          STR d24, [x17], 8
-          DUP d24, v24.d[1]
-          STR d22, [x16], 8
-          DUP d22, v22.d[1]
-          STR d20,  [x6], 8
-          DUP d20, v20.d[1]
+          STR h30,  [x7]
+          STR h28, [x13]
+          STR h26, [x14]
+          STR h24, [x17]
+          STR h22, [x16]
+          STR h20,  [x6]
         $else:
-          STR d20,  [x6], 8
-          DUP d20, v20.d[1]
-          STR d22, [x16], 8
-          DUP d22, v22.d[1]
-          STR d24, [x17], 8
-          DUP d24, v24.d[1]
-          STR d26, [x14], 8
-          DUP d26, v26.d[1]
-          STR d28, [x13], 8
-          DUP d28, v28.d[1]
-          STR d30,  [x7], 8
-          DUP d30, v30.d[1]
-
+          STR h20,  [x6]
+          STR h22, [x16]
+          STR h24, [x17]
+          STR h26, [x14]
+          STR h28, [x13]
+          STR h30,  [x7]
 9:
-        TBZ x1, 1, 10f
-        $if INC:
-          STR s30,  [x7], 4
-          DUP s30, v30.s[1]
-          STR s28, [x13], 4
-          DUP s28, v28.s[1]
-          STR s26, [x14], 4
-          DUP s26, v26.s[1]
-          STR s24, [x17], 4
-          DUP s24, v24.s[1]
-          STR s22, [x16], 4
-          DUP s22, v22.s[1]
-          STR s20,  [x6], 4
-          DUP s20, v20.s[1]
-        $else:
-          STR s20,  [x6], 4
-          DUP s20, v20.s[1]
-          STR s22, [x16], 4
-          DUP s22, v22.s[1]
-          STR s24, [x17], 4
-          DUP s24, v24.s[1]
-          STR s26, [x14], 4
-          DUP s26, v26.s[1]
-          STR s28, [x13], 4
-          DUP s28, v28.s[1]
-          STR s30,  [x7], 4
-          DUP s30, v30.s[1]
-
-10:
-        TBZ x1, 0, 11f
-        $if INC:
-          STR h30,  [x7]
-          STR h28, [x13]
-          STR h26, [x14]
-          STR h24, [x17]
-          STR h22, [x16]
-          STR h20,  [x6]
-        $else:
-          STR h20,  [x6]
-          STR h22, [x16]
-          STR h24, [x17]
-          STR h26, [x14]
-          STR h28, [x13]
-          STR h30,  [x7]
-11:
         RET
 
 END_FUNCTION xnn_f16_gemm${"inc" if INC else ""}_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64