text format white space of prefetch instruction on ARM microkernels

PiperOrigin-RevId: 372582678
diff --git a/src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in b/src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in
index fa08c9b..d3c301b 100644
--- a/src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in
+++ b/src/f32-gemm/4x8-aarch64-neonfma-cortex-a75.S.in
@@ -154,19 +154,19 @@
         FMLA v31.4s, v23.4s, v3.s[1]
         FMLA v16.4s, v24.4s, v0.s[2]
         $if PREFETCH:
-          PRFM PLDL1KEEP, [x5, 128]
+          PRFM    PLDL1KEEP, [x5, 128]
         FMLA v17.4s, v25.4s, v0.s[2]
         FMLA v18.4s, v24.4s, v1.s[2]
         $if PREFETCH:
-          PRFM PLDL1KEEP, [x5, 192]
+          PRFM    PLDL1KEEP, [x5, 192]
         FMLA v19.4s, v25.4s, v1.s[2]
         FMLA v28.4s, v24.4s, v2.s[2]
         $if PREFETCH:
-          PRFM PLDL1KEEP, [x5, 256]
+          PRFM    PLDL1KEEP, [x5, 256]
         FMLA v29.4s, v25.4s, v2.s[2]
         FMLA v30.4s, v24.4s, v3.s[2]
         $if PREFETCH:
-          PRFM PLDL1KEEP, [x5, 320]
+          PRFM    PLDL1KEEP, [x5, 320]
         FMLA v31.4s, v25.4s, v3.s[2]
         FMLA v16.4s, v26.4s, v0.s[3]
         FMLA v17.4s, v27.4s, v0.s[3]