blob: f5a374ea7c84c260963c2e7afdc0168058c4fe3c [file] [log] [blame]
Frank Barchardbaa9ead2019-10-18 18:06:41 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
XNNPACK Teamb455b122019-09-27 18:10:33 -07005
6#include <xnnpack/assembly.h>
7
Frank Barcharda2f18912021-12-28 14:17:05 -08008# void xnn_f32_gemm${"inc" if INC else ""}_minmax_ukernel_6x8__aarch64_neonfma${"_prfm" if PREFETCH else ""}_cortex_a75(
XNNPACK Teamb455b122019-09-27 18:10:33 -07009# size_t mr, x0
10# size_t nc, x1
11# size_t kc, x2 / x0
12# const uint8_t*restrict a, x3
13# size_t a_stride, x4
14# const void*restrict w, x5
15# uint8_t*restrict c, x6
16# size_t cm_stride, x7
Frank Barchard67242182020-06-11 11:12:50 -070017# size_t cn_stride, [sp] -> (x0)
XNNPACK Teamb455b122019-09-27 18:10:33 -070018$if INC:
19 # const float*restrict acc, [sp + 8] -> x15
Marat Dukhanf196d012020-04-15 11:50:03 -070020 # const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) [sp + 16] -> x8
XNNPACK Teamb455b122019-09-27 18:10:33 -070021$else:
Marat Dukhanf196d012020-04-15 11:50:03 -070022 # const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) [sp + 8] -> x8
XNNPACK Teamb455b122019-09-27 18:10:33 -070023
Frank Barchard909564c2020-06-09 03:54:33 -070024# d8-d15, x19-x30 need to be preserved if used. x18 is reserved by the OS.
XNNPACK Teamb455b122019-09-27 18:10:33 -070025
26# A pointers
27# x3 a0
28# x9 a1
29# x10 a2
30# x11 a3
31# x12 a4
32# x4 a5
33
34# C pointers
35# x6 c0
36# x16 c1
37# x17 c2
Frank Barchard67242182020-06-11 11:12:50 -070038# x14 c3
XNNPACK Teamb455b122019-09-27 18:10:33 -070039# x13 c4
40# x7 c5
41
42# Vector register usage
43# A0 v0 v6
44# A1 v1 v7
45# A2 v2 v8
46# A3 v3 v9
47# A4 v4 v10
48# A5 v5 v11
49# B v12 v13 v14 v15
50# B v16 v17 v18 v19
51# C v20 v21
52# C v22 v23
53# C v24 v25
54# C v26 v27
55# C v28 v29
56# C v30 v31
57# Clamp v6 v7
58
Frank Barcharda2f18912021-12-28 14:17:05 -080059BEGIN_FUNCTION xnn_f32_gemm${"inc" if INC else ""}_minmax_ukernel_6x8__aarch64_neonfma${"_prfm" if PREFETCH else ""}_cortex_a75
XNNPACK Teamb455b122019-09-27 18:10:33 -070060
Frank Barchardf5cc7e72020-04-20 11:35:48 -070061 $if INC:
Frank Barchard67242182020-06-11 11:12:50 -070062 # Load acc, params pointer
Frank Barchard76f43f02021-05-12 14:52:01 -070063 LDP x15, x8, [sp, 8]
Frank Barchardf5cc7e72020-04-20 11:35:48 -070064 $else:
Frank Barchard67242182020-06-11 11:12:50 -070065 # Load params pointer
Frank Barchard76f43f02021-05-12 14:52:01 -070066 LDR x8, [sp, 8]
Frank Barchardf5cc7e72020-04-20 11:35:48 -070067
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 # Clamp A and C pointers / Save d8-d15 on stack
Frank Barchard76f43f02021-05-12 14:52:01 -070069 STP d8, d9, [sp, -64]!
70 CMP x0, 2 // if mr < 2
71 ADD x9, x3, x4 // a1 = a0 + a_stride
72 ADD x16, x6, x7 // c1 = c0 + cm_stride
73 CSEL x9, x3, x9, LO // a1 = a0
74 CSEL x16, x6, x16, LO // c1 = c0
XNNPACK Teamb455b122019-09-27 18:10:33 -070075
Frank Barchard76f43f02021-05-12 14:52:01 -070076 STP d10, d11, [sp, 16]
77 ADD x10, x9, x4 // a2 = a1 + a_stride
78 ADD x17, x16, x7 // c2 = c1 + cm_stride
Frank Barchard7c9f1f92021-06-04 14:38:55 -070079 // if mr <= 2
Frank Barchard76f43f02021-05-12 14:52:01 -070080 CSEL x10, x9, x10, LS // a2 = a1
81 CSEL x17, x16, x17, LS // c2 = c1
XNNPACK Teamb455b122019-09-27 18:10:33 -070082
Frank Barchard76f43f02021-05-12 14:52:01 -070083 STP d12, d13, [sp, 32]
84 CMP x0, 4 // if mr < 4
85 ADD x11, x10, x4 // a3 = a2 + a_stride
86 ADD x14, x17, x7 // c3 = c2 + cm_stride
87 CSEL x11, x10, x11, LO // a3 = a2
88 CSEL x14, x17, x14, LO // c3 = c2
XNNPACK Teamb455b122019-09-27 18:10:33 -070089
Frank Barchard76f43f02021-05-12 14:52:01 -070090 STP d14, d15, [sp, 48]
91 ADD x12, x11, x4 // a4 = a3 + a_stride
92 ADD x13, x14, x7 // c4 = c3 + cm_stride
Frank Barchard7c9f1f92021-06-04 14:38:55 -070093 // if mr <= 4
Frank Barchard76f43f02021-05-12 14:52:01 -070094 CSEL x12, x11, x12, LS // a4 = a3
95 CSEL x13, x14, x13, LS // c4 = c3
XNNPACK Teamb455b122019-09-27 18:10:33 -070096
Frank Barchard76f43f02021-05-12 14:52:01 -070097 CMP x0, 6 // if mr < 6
98 ADD x4, x12, x4 // a5 = a4 + a_stride
99 ADD x7, x13, x7 // c5 = c4 + cm_stride
100 CSEL x4, x12, x4, LO // a5 = a4
101 CSEL x7, x13, x7, LO // c5 = c4
XNNPACK Teamb455b122019-09-27 18:10:33 -0700102
XNNPACK Teamb455b122019-09-27 18:10:33 -07001030:
104 $if INC:
105 # Load initial accumulators
Frank Barchard76f43f02021-05-12 14:52:01 -0700106 LDP q20, q21, [x15], 32
107 LDP q22, q23, [x15], 32
108 LDP q24, q25, [x15], 32
109 LDP q26, q27, [x15], 32
110 LDP q28, q29, [x15], 32
111 LDP q30, q31, [x15], 32
Frank Barchard387c2d12019-12-16 19:14:07 -0800112 $if PREFETCH:
Frank Barchard76f43f02021-05-12 14:52:01 -0700113 PRFM PLDL1KEEP, [x5, 0] // Prefetch B
Frank Barchardcbfa3382021-05-07 10:30:05 -0700114 PRFM PLDL1KEEP, [x5, 64]
115 PRFM PLDL1KEEP, [x5, 128]
116 PRFM PLDL1KEEP, [x5, 192]
Frank Barchard76f43f02021-05-12 14:52:01 -0700117 PRFM PLDL1KEEP, [x3] // Prefetch A
Frank Barchardcbfa3382021-05-07 10:30:05 -0700118 PRFM PLDL1KEEP, [x9]
119 PRFM PLDL1KEEP, [x10]
120 PRFM PLDL1KEEP, [x11]
121 PRFM PLDL1KEEP, [x12]
122 PRFM PLDL1KEEP, [x4]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700123 $else:
124 # Load initial bias from w into accumulators
Frank Barchard76f43f02021-05-12 14:52:01 -0700125 LDP q20, q21, [x5], 32
126 MOV v22.16b, v20.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800127 $if PREFETCH:
Frank Barchard76f43f02021-05-12 14:52:01 -0700128 PRFM PLDL1KEEP, [x5, 0] // Prefetch B
129 MOV v23.16b, v21.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800130 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700131 PRFM PLDL1KEEP, [x5, 64]
Frank Barchard76f43f02021-05-12 14:52:01 -0700132 MOV v24.16b, v20.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800133 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700134 PRFM PLDL1KEEP, [x5, 128]
Frank Barchard76f43f02021-05-12 14:52:01 -0700135 MOV v25.16b, v21.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800136 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700137 PRFM PLDL1KEEP, [x5, 192]
Frank Barchard76f43f02021-05-12 14:52:01 -0700138 MOV v26.16b, v20.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800139 $if PREFETCH:
Frank Barchard76f43f02021-05-12 14:52:01 -0700140 PRFM PLDL1KEEP, [x3] // Prefetch A
141 MOV v27.16b, v21.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800142 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700143 PRFM PLDL1KEEP, [x9]
Frank Barchard76f43f02021-05-12 14:52:01 -0700144 MOV v28.16b, v20.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800145 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700146 PRFM PLDL1KEEP, [x10]
Frank Barchard76f43f02021-05-12 14:52:01 -0700147 MOV v29.16b, v21.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800148 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700149 PRFM PLDL1KEEP, [x11]
Frank Barchard76f43f02021-05-12 14:52:01 -0700150 MOV v30.16b, v20.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800151 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700152 PRFM PLDL1KEEP, [x12]
Frank Barchard76f43f02021-05-12 14:52:01 -0700153 MOV v31.16b, v21.16b
Frank Barchard387c2d12019-12-16 19:14:07 -0800154 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700155 PRFM PLDL1KEEP, [x4]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700156
157 # Is there at least 8 floats (32 bytes) for prologue + epilogue?
Frank Barchard76f43f02021-05-12 14:52:01 -0700158 SUBS x0, x2, 32 // k = kc - 32
159 B.LO 4f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700160
161 # Prologue - loads for main loop of 96 FMA
Frank Barchard76f43f02021-05-12 14:52:01 -0700162 LDR q0, [x3], 16
163 LDR q1, [x9], 16
164 LDR q2, [x10], 16
165 LDR q3, [x11], 16
166 LDR q4, [x12], 16
167 LDR q5, [x4], 16
168 LDP q12, q13, [x5], 32 // Fetch 3 B (4th deferred)
169 LDP q14, q15, [x5], 32
170 LDP q16, q17, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700171
172 # Is there at least 8 floats (32 bytes) for main loop?
Frank Barchard76f43f02021-05-12 14:52:01 -0700173 SUBS x0, x0, 32
174 B.LO 2f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700175
176 # Main loop - 8 floats of A (32 bytes)
177 # 96 FMA + 6 LDP A + 8 LDP B
1781:
179 # First group of 4 A. 48 FMA.
Frank Barchard76f43f02021-05-12 14:52:01 -0700180 FMLA v20.4s, v12.4s, v0.s[0]
181 LDP q18, q19, [x5], 32 // Load last B
182 FMLA v22.4s, v12.4s, v1.s[0]
183 FMLA v24.4s, v12.4s, v2.s[0]
184 FMLA v26.4s, v12.4s, v3.s[0]
185 FMLA v28.4s, v12.4s, v4.s[0]
186 FMLA v30.4s, v12.4s, v5.s[0]
187 FMLA v21.4s, v13.4s, v0.s[0]
188 FMLA v23.4s, v13.4s, v1.s[0]
189 FMLA v25.4s, v13.4s, v2.s[0]
190 FMLA v27.4s, v13.4s, v3.s[0]
191 FMLA v29.4s, v13.4s, v4.s[0]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700192
Frank Barchard76f43f02021-05-12 14:52:01 -0700193 FMLA v31.4s, v13.4s, v5.s[0]
194 FMLA v20.4s, v14.4s, v0.s[1]
Frank Barchard387c2d12019-12-16 19:14:07 -0800195 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700196 PRFM PLDL1KEEP, [x5, 128] // Prefetch B
Frank Barchard76f43f02021-05-12 14:52:01 -0700197 FMLA v22.4s, v14.4s, v1.s[1]
198 FMLA v24.4s, v14.4s, v2.s[1]
199 FMLA v26.4s, v14.4s, v3.s[1]
200 FMLA v28.4s, v14.4s, v4.s[1]
Frank Barchard387c2d12019-12-16 19:14:07 -0800201 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700202 PRFM PLDL1KEEP, [x5, 256]
Frank Barchard76f43f02021-05-12 14:52:01 -0700203 FMLA v30.4s, v14.4s, v5.s[1]
204 FMLA v21.4s, v15.4s, v0.s[1]
205 FMLA v23.4s, v15.4s, v1.s[1]
206 FMLA v25.4s, v15.4s, v2.s[1]
207 LDR q6, [x3], 16 // Load next 6 A
208 FMLA v27.4s, v15.4s, v3.s[1]
209 FMLA v29.4s, v15.4s, v4.s[1]
210 FMLA v31.4s, v15.4s, v5.s[1]
211 LDR q7, [x9], 16
XNNPACK Teamb455b122019-09-27 18:10:33 -0700212
Frank Barchard76f43f02021-05-12 14:52:01 -0700213 FMLA v20.4s, v16.4s, v0.s[2]
214 FMLA v22.4s, v16.4s, v1.s[2]
215 FMLA v24.4s, v16.4s, v2.s[2]
216 LDR q8, [x10], 16
217 FMLA v26.4s, v16.4s, v3.s[2]
218 FMLA v28.4s, v16.4s, v4.s[2]
219 FMLA v30.4s, v16.4s, v5.s[2]
220 LDR q9, [x11], 16
221 FMLA v21.4s, v17.4s, v0.s[2]
222 FMLA v23.4s, v17.4s, v1.s[2]
223 FMLA v25.4s, v17.4s, v2.s[2]
224 LDR q10, [x12], 16
225 FMLA v27.4s, v17.4s, v3.s[2]
226 FMLA v29.4s, v17.4s, v4.s[2]
227 FMLA v31.4s, v17.4s, v5.s[2]
228 LDR q11, [x4], 16
XNNPACK Teamb455b122019-09-27 18:10:33 -0700229
Frank Barchard76f43f02021-05-12 14:52:01 -0700230 FMLA v20.4s, v18.4s, v0.s[3]
231 FMLA v22.4s, v18.4s, v1.s[3]
232 FMLA v24.4s, v18.4s, v2.s[3]
233 LDP q12, q13, [x5], 32 // Load 4 B
234 FMLA v26.4s, v18.4s, v3.s[3]
235 FMLA v28.4s, v18.4s, v4.s[3]
236 FMLA v30.4s, v18.4s, v5.s[3]
237 LDP q14, q15, [x5], 32
238 FMLA v21.4s, v19.4s, v0.s[3]
239 FMLA v23.4s, v19.4s, v1.s[3]
240 FMLA v25.4s, v19.4s, v2.s[3]
241 LDP q16, q17, [x5], 32
242 FMLA v27.4s, v19.4s, v3.s[3]
243 FMLA v29.4s, v19.4s, v4.s[3]
244 FMLA v31.4s, v19.4s, v5.s[3]
245 LDP q18, q19, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700246
247 # Second group of 4 A. 48 FMA.
Frank Barchard76f43f02021-05-12 14:52:01 -0700248 FMLA v20.4s, v12.4s, v6.s[0]
249 FMLA v22.4s, v12.4s, v7.s[0]
250 FMLA v24.4s, v12.4s, v8.s[0]
251 LDR q0, [x3], 16 // Load next 6 A
252 FMLA v26.4s, v12.4s, v9.s[0]
253 FMLA v28.4s, v12.4s, v10.s[0]
254 FMLA v30.4s, v12.4s, v11.s[0]
255 LDR q1, [x9], 16
256 FMLA v21.4s, v13.4s, v6.s[0]
257 FMLA v23.4s, v13.4s, v7.s[0]
258 FMLA v25.4s, v13.4s, v8.s[0]
259 LDR q2, [x10], 16
260 FMLA v27.4s, v13.4s, v9.s[0]
261 FMLA v29.4s, v13.4s, v10.s[0]
262 FMLA v31.4s, v13.4s, v11.s[0]
263 LDR q3, [x11], 16
XNNPACK Teamb455b122019-09-27 18:10:33 -0700264
Frank Barchard76f43f02021-05-12 14:52:01 -0700265 FMLA v20.4s, v14.4s, v6.s[1]
266 FMLA v22.4s, v14.4s, v7.s[1]
267 FMLA v24.4s, v14.4s, v8.s[1]
268 LDR q4, [x12], 16
269 FMLA v26.4s, v14.4s, v9.s[1]
270 FMLA v28.4s, v14.4s, v10.s[1]
271 FMLA v30.4s, v14.4s, v11.s[1]
272 LDR q5, [x4], 16
273 FMLA v21.4s, v15.4s, v6.s[1]
274 FMLA v23.4s, v15.4s, v7.s[1]
275 FMLA v25.4s, v15.4s, v8.s[1]
276 LDP q12, q13, [x5], 32 // Load next 3 B (not last)
277 FMLA v27.4s, v15.4s, v9.s[1]
278 FMLA v29.4s, v15.4s, v10.s[1]
279 FMLA v31.4s, v15.4s, v11.s[1]
280 LDP q14, q15, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700281
Frank Barchard76f43f02021-05-12 14:52:01 -0700282 FMLA v20.4s, v16.4s, v6.s[2]
283 FMLA v22.4s, v16.4s, v7.s[2]
284 FMLA v24.4s, v16.4s, v8.s[2]
285 FMLA v26.4s, v16.4s, v9.s[2]
286 FMLA v28.4s, v16.4s, v10.s[2]
287 FMLA v30.4s, v16.4s, v11.s[2]
288 FMLA v21.4s, v17.4s, v6.s[2]
289 FMLA v23.4s, v17.4s, v7.s[2]
290 FMLA v25.4s, v17.4s, v8.s[2]
291 FMLA v27.4s, v17.4s, v9.s[2]
292 FMLA v29.4s, v17.4s, v10.s[2]
293 FMLA v31.4s, v17.4s, v11.s[2]
294 LDP q16, q17, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700295
Frank Barchard76f43f02021-05-12 14:52:01 -0700296 FMLA v20.4s, v18.4s, v6.s[3]
297 FMLA v22.4s, v18.4s, v7.s[3]
298 SUBS x0, x0, 32
299 FMLA v24.4s, v18.4s, v8.s[3]
300 FMLA v26.4s, v18.4s, v9.s[3]
301 FMLA v28.4s, v18.4s, v10.s[3]
302 FMLA v30.4s, v18.4s, v11.s[3]
303 FMLA v21.4s, v19.4s, v6.s[3]
304 FMLA v23.4s, v19.4s, v7.s[3]
305 FMLA v25.4s, v19.4s, v8.s[3]
306 FMLA v27.4s, v19.4s, v9.s[3]
307 FMLA v29.4s, v19.4s, v10.s[3]
308 FMLA v31.4s, v19.4s, v11.s[3]
309 B.HS 1b
XNNPACK Teamb455b122019-09-27 18:10:33 -0700310
311 # Epilogue - 8 floats of A (32 bytes)
312 # 96 FMA + 6 LDP A + 8 LDP B
313 # First block same as main loop. Second block has no preloads.
3142:
315 # First group of 4 A. 48 FMA.
Frank Barchard76f43f02021-05-12 14:52:01 -0700316 FMLA v20.4s, v12.4s, v0.s[0]
317 LDP q18, q19, [x5], 32 // Load last B
318 FMLA v22.4s, v12.4s, v1.s[0]
319 FMLA v24.4s, v12.4s, v2.s[0]
320 FMLA v26.4s, v12.4s, v3.s[0]
321 FMLA v28.4s, v12.4s, v4.s[0]
322 FMLA v30.4s, v12.4s, v5.s[0]
323 FMLA v21.4s, v13.4s, v0.s[0]
324 FMLA v23.4s, v13.4s, v1.s[0]
325 FMLA v25.4s, v13.4s, v2.s[0]
326 FMLA v27.4s, v13.4s, v3.s[0]
327 FMLA v29.4s, v13.4s, v4.s[0]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700328
Frank Barchard76f43f02021-05-12 14:52:01 -0700329 FMLA v31.4s, v13.4s, v5.s[0]
330 FMLA v20.4s, v14.4s, v0.s[1]
Frank Barchard387c2d12019-12-16 19:14:07 -0800331 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700332 PRFM PLDL1KEEP, [x5, 128] // Prefetch B
Frank Barchard76f43f02021-05-12 14:52:01 -0700333 FMLA v22.4s, v14.4s, v1.s[1]
334 FMLA v24.4s, v14.4s, v2.s[1]
335 FMLA v26.4s, v14.4s, v3.s[1]
336 FMLA v28.4s, v14.4s, v4.s[1]
Frank Barchard387c2d12019-12-16 19:14:07 -0800337 $if PREFETCH:
Frank Barchardcbfa3382021-05-07 10:30:05 -0700338 PRFM PLDL1KEEP, [x5, 256]
Frank Barchard76f43f02021-05-12 14:52:01 -0700339 FMLA v30.4s, v14.4s, v5.s[1]
340 FMLA v21.4s, v15.4s, v0.s[1]
341 FMLA v23.4s, v15.4s, v1.s[1]
342 FMLA v25.4s, v15.4s, v2.s[1]
343 LDR q6, [x3], 16 // Load next 6 A
344 FMLA v27.4s, v15.4s, v3.s[1]
345 FMLA v29.4s, v15.4s, v4.s[1]
346 FMLA v31.4s, v15.4s, v5.s[1]
347 LDR q7, [x9], 16
XNNPACK Teamb455b122019-09-27 18:10:33 -0700348
Frank Barchard76f43f02021-05-12 14:52:01 -0700349 FMLA v20.4s, v16.4s, v0.s[2]
350 FMLA v22.4s, v16.4s, v1.s[2]
351 FMLA v24.4s, v16.4s, v2.s[2]
352 LDR q8, [x10], 16
353 FMLA v26.4s, v16.4s, v3.s[2]
354 FMLA v28.4s, v16.4s, v4.s[2]
355 FMLA v30.4s, v16.4s, v5.s[2]
356 LDR q9, [x11], 16
357 FMLA v21.4s, v17.4s, v0.s[2]
358 FMLA v23.4s, v17.4s, v1.s[2]
359 FMLA v25.4s, v17.4s, v2.s[2]
360 LDR q10, [x12], 16
361 FMLA v27.4s, v17.4s, v3.s[2]
362 FMLA v29.4s, v17.4s, v4.s[2]
363 FMLA v31.4s, v17.4s, v5.s[2]
364 LDR q11, [x4], 16
XNNPACK Teamb455b122019-09-27 18:10:33 -0700365
Frank Barchard76f43f02021-05-12 14:52:01 -0700366 FMLA v20.4s, v18.4s, v0.s[3]
367 FMLA v22.4s, v18.4s, v1.s[3]
368 FMLA v24.4s, v18.4s, v2.s[3]
369 LDP q12, q13, [x5], 32 // Load 4 B
370 FMLA v26.4s, v18.4s, v3.s[3]
371 FMLA v28.4s, v18.4s, v4.s[3]
372 FMLA v30.4s, v18.4s, v5.s[3]
373 LDP q14, q15, [x5], 32
374 FMLA v21.4s, v19.4s, v0.s[3]
375 FMLA v23.4s, v19.4s, v1.s[3]
376 FMLA v25.4s, v19.4s, v2.s[3]
377 LDP q16, q17, [x5], 32
378 FMLA v27.4s, v19.4s, v3.s[3]
379 FMLA v29.4s, v19.4s, v4.s[3]
380 FMLA v31.4s, v19.4s, v5.s[3]
381 LDP q18, q19, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700382
383 # Second group of 4 A. 48 FMA.
Frank Barchard76f43f02021-05-12 14:52:01 -0700384 FMLA v20.4s, v12.4s, v6.s[0]
385 FMLA v22.4s, v12.4s, v7.s[0]
386 FMLA v24.4s, v12.4s, v8.s[0]
387 FMLA v26.4s, v12.4s, v9.s[0]
388 FMLA v28.4s, v12.4s, v10.s[0]
389 FMLA v30.4s, v12.4s, v11.s[0]
390 FMLA v21.4s, v13.4s, v6.s[0]
391 FMLA v23.4s, v13.4s, v7.s[0]
392 FMLA v25.4s, v13.4s, v8.s[0]
393 FMLA v27.4s, v13.4s, v9.s[0]
394 FMLA v29.4s, v13.4s, v10.s[0]
395 FMLA v31.4s, v13.4s, v11.s[0]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700396
Frank Barchard76f43f02021-05-12 14:52:01 -0700397 FMLA v20.4s, v14.4s, v6.s[1]
398 FMLA v22.4s, v14.4s, v7.s[1]
399 FMLA v24.4s, v14.4s, v8.s[1]
400 FMLA v26.4s, v14.4s, v9.s[1]
401 FMLA v28.4s, v14.4s, v10.s[1]
402 FMLA v30.4s, v14.4s, v11.s[1]
403 FMLA v21.4s, v15.4s, v6.s[1]
404 FMLA v23.4s, v15.4s, v7.s[1]
405 FMLA v25.4s, v15.4s, v8.s[1]
406 FMLA v27.4s, v15.4s, v9.s[1]
407 FMLA v29.4s, v15.4s, v10.s[1]
408 FMLA v31.4s, v15.4s, v11.s[1]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700409
Frank Barchard76f43f02021-05-12 14:52:01 -0700410 FMLA v20.4s, v16.4s, v6.s[2]
411 FMLA v22.4s, v16.4s, v7.s[2]
412 FMLA v24.4s, v16.4s, v8.s[2]
413 FMLA v26.4s, v16.4s, v9.s[2]
414 FMLA v28.4s, v16.4s, v10.s[2]
415 FMLA v30.4s, v16.4s, v11.s[2]
416 FMLA v21.4s, v17.4s, v6.s[2]
417 FMLA v23.4s, v17.4s, v7.s[2]
418 FMLA v25.4s, v17.4s, v8.s[2]
419 FMLA v27.4s, v17.4s, v9.s[2]
420 FMLA v29.4s, v17.4s, v10.s[2]
421 FMLA v31.4s, v17.4s, v11.s[2]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700422
Frank Barchard76f43f02021-05-12 14:52:01 -0700423 FMLA v20.4s, v18.4s, v6.s[3]
424 FMLA v22.4s, v18.4s, v7.s[3]
425 FMLA v24.4s, v18.4s, v8.s[3]
426 FMLA v26.4s, v18.4s, v9.s[3]
427 FMLA v28.4s, v18.4s, v10.s[3]
428 FMLA v30.4s, v18.4s, v11.s[3]
429 FMLA v21.4s, v19.4s, v6.s[3]
430 FMLA v23.4s, v19.4s, v7.s[3]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700431
Marat Dukhaneb09a6b2020-04-08 17:34:32 -0700432 # Load min/max values
Frank Barchard76f43f02021-05-12 14:52:01 -0700433 LD2R {v6.4s, v7.4s}, [x8]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700434
Frank Barchard76f43f02021-05-12 14:52:01 -0700435 FMLA v25.4s, v19.4s, v8.s[3]
436 FMLA v27.4s, v19.4s, v9.s[3]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700437 # Is there a remainder?- 4 floats of A (16 bytes) or less
Frank Barchard76f43f02021-05-12 14:52:01 -0700438 TST x0, 31
439 FMLA v29.4s, v19.4s, v10.s[3]
440 FMLA v31.4s, v19.4s, v11.s[3]
441 B.NE 4f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700442
443 # Clamp
4443:
Frank Barchard76f43f02021-05-12 14:52:01 -0700445 FMAX v20.4s, v20.4s, v6.4s
Frank Barchard67242182020-06-11 11:12:50 -0700446 # Load cn_stride
Frank Barchard76f43f02021-05-12 14:52:01 -0700447 LDR x0, [sp, 64]
448 FMAX v21.4s, v21.4s, v6.4s
449 FMAX v22.4s, v22.4s, v6.4s
450 FMAX v23.4s, v23.4s, v6.4s
451 FMAX v24.4s, v24.4s, v6.4s
452 FMAX v25.4s, v25.4s, v6.4s
453 FMAX v26.4s, v26.4s, v6.4s
454 FMAX v27.4s, v27.4s, v6.4s
455 FMAX v28.4s, v28.4s, v6.4s
456 FMAX v29.4s, v29.4s, v6.4s
457 FMAX v30.4s, v30.4s, v6.4s
458 FMAX v31.4s, v31.4s, v6.4s
459 SUBS x1, x1, 8
460 FMIN v20.4s, v20.4s, v7.4s
461 FMIN v21.4s, v21.4s, v7.4s
462 FMIN v22.4s, v22.4s, v7.4s
463 FMIN v23.4s, v23.4s, v7.4s
464 FMIN v24.4s, v24.4s, v7.4s
465 FMIN v25.4s, v25.4s, v7.4s
466 FMIN v26.4s, v26.4s, v7.4s
467 FMIN v27.4s, v27.4s, v7.4s
468 FMIN v28.4s, v28.4s, v7.4s
469 FMIN v29.4s, v29.4s, v7.4s
470 FMIN v30.4s, v30.4s, v7.4s
471 FMIN v31.4s, v31.4s, v7.4s
XNNPACK Teamb455b122019-09-27 18:10:33 -0700472
473 # Store full 6 x 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700474 B.LO 7f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700475
476 $if INC:
Frank Barchard76f43f02021-05-12 14:52:01 -0700477 STP q30, q31, [x7]
478 ADD x7, x7, x0
479 SUB x3, x3, x2 // a0 -= kc
480 STP q28, q29, [x13]
481 ADD x13, x13, x0
482 SUB x9, x9, x2 // a1 -= kc
483 STP q26, q27, [x14]
484 ADD x14, x14, x0
485 SUB x10, x10, x2 // a2 -= kc
486 STP q24, q25, [x17]
487 ADD x17, x17, x0
488 SUB x11, x11, x2 // a3 -= kc
489 STP q22, q23, [x16]
490 ADD x16, x16, x0
491 SUB x12, x12, x2 // a4 -= kc
492 STP q20, q21, [x6]
493 ADD x6, x6, x0
494 SUB x4, x4, x2 // a5 -= kc
XNNPACK Teamb455b122019-09-27 18:10:33 -0700495 $else:
Frank Barchard76f43f02021-05-12 14:52:01 -0700496 STP q20, q21, [x6]
497 ADD x6, x6, x0
498 SUB x3, x3, x2 // a0 -= kc
499 STP q22, q23, [x16]
500 ADD x16, x16, x0
501 SUB x9, x9, x2 // a1 -= kc
502 STP q24, q25, [x17]
503 ADD x17, x17, x0
504 SUB x10, x10, x2 // a2 -= kc
505 STP q26, q27, [x14]
506 ADD x14, x14, x0
507 SUB x11, x11, x2 // a3 -= kc
508 STP q28, q29, [x13]
509 ADD x13, x13, x0
510 SUB x12, x12, x2 // a4 -= kc
511 STP q30, q31, [x7]
512 ADD x7, x7, x0
513 SUB x4, x4, x2 // a5 -= kc
XNNPACK Teamb455b122019-09-27 18:10:33 -0700514
Frank Barchard76f43f02021-05-12 14:52:01 -0700515 B.HI 0b
XNNPACK Teamb455b122019-09-27 18:10:33 -0700516
517 # Restore d8-d15 from stack
Frank Barchard76f43f02021-05-12 14:52:01 -0700518 LDP d14, d15, [sp, 48]
519 LDP d12, d13, [sp, 32]
520 LDP d10, d11, [sp, 16]
521 LDP d8, d9, [sp], 64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700522 RET
523
5244:
Marat Dukhaneb09a6b2020-04-08 17:34:32 -0700525 # Load min/max values
Frank Barchard76f43f02021-05-12 14:52:01 -0700526 LD2R {v6.4s, v7.4s}, [x8]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700527
528 # Is there a remainder?- 4 floats of A (16 bytes)
Frank Barchard76f43f02021-05-12 14:52:01 -0700529 TBZ x0, 4, 5f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700530
531 # Remainder- 4 floats of A (16 bytes)
532 # Load A
Frank Barchard76f43f02021-05-12 14:52:01 -0700533 LDR q0, [x3], 16
534 LDR q1, [x9], 16
535 LDR q2, [x10], 16
536 LDR q3, [x11], 16
537 LDR q4, [x12], 16
538 LDR q5, [x4], 16
XNNPACK Teamb455b122019-09-27 18:10:33 -0700539 # Load B
Frank Barchard76f43f02021-05-12 14:52:01 -0700540 LDP q12, q13, [x5], 32
541 LDP q14, q15, [x5], 32
542 LDP q16, q17, [x5], 32
543 LDP q18, q19, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700544
Frank Barchard76f43f02021-05-12 14:52:01 -0700545 FMLA v20.4s, v12.4s, v0.s[0]
546 FMLA v22.4s, v12.4s, v1.s[0]
547 FMLA v24.4s, v12.4s, v2.s[0]
548 FMLA v26.4s, v12.4s, v3.s[0]
549 FMLA v28.4s, v12.4s, v4.s[0]
550 FMLA v30.4s, v12.4s, v5.s[0]
551 FMLA v21.4s, v13.4s, v0.s[0]
552 FMLA v23.4s, v13.4s, v1.s[0]
553 FMLA v25.4s, v13.4s, v2.s[0]
554 FMLA v27.4s, v13.4s, v3.s[0]
555 FMLA v29.4s, v13.4s, v4.s[0]
556 FMLA v31.4s, v13.4s, v5.s[0]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700557
Frank Barchard76f43f02021-05-12 14:52:01 -0700558 FMLA v20.4s, v14.4s, v0.s[1]
559 FMLA v22.4s, v14.4s, v1.s[1]
560 FMLA v24.4s, v14.4s, v2.s[1]
561 FMLA v26.4s, v14.4s, v3.s[1]
562 FMLA v28.4s, v14.4s, v4.s[1]
563 FMLA v30.4s, v14.4s, v5.s[1]
564 FMLA v21.4s, v15.4s, v0.s[1]
565 FMLA v23.4s, v15.4s, v1.s[1]
566 FMLA v25.4s, v15.4s, v2.s[1]
567 FMLA v27.4s, v15.4s, v3.s[1]
568 FMLA v29.4s, v15.4s, v4.s[1]
569 FMLA v31.4s, v15.4s, v5.s[1]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570
Frank Barchard76f43f02021-05-12 14:52:01 -0700571 FMLA v20.4s, v16.4s, v0.s[2]
572 FMLA v22.4s, v16.4s, v1.s[2]
573 FMLA v24.4s, v16.4s, v2.s[2]
574 FMLA v26.4s, v16.4s, v3.s[2]
575 FMLA v28.4s, v16.4s, v4.s[2]
576 FMLA v30.4s, v16.4s, v5.s[2]
577 FMLA v21.4s, v17.4s, v0.s[2]
578 FMLA v23.4s, v17.4s, v1.s[2]
579 FMLA v25.4s, v17.4s, v2.s[2]
580 FMLA v27.4s, v17.4s, v3.s[2]
581 FMLA v29.4s, v17.4s, v4.s[2]
582 FMLA v31.4s, v17.4s, v5.s[2]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700583
Frank Barchard76f43f02021-05-12 14:52:01 -0700584 FMLA v20.4s, v18.4s, v0.s[3]
585 FMLA v22.4s, v18.4s, v1.s[3]
586 FMLA v24.4s, v18.4s, v2.s[3]
587 FMLA v26.4s, v18.4s, v3.s[3]
588 FMLA v28.4s, v18.4s, v4.s[3]
589 FMLA v30.4s, v18.4s, v5.s[3]
590 FMLA v21.4s, v19.4s, v0.s[3]
591 FMLA v23.4s, v19.4s, v1.s[3]
592 FMLA v25.4s, v19.4s, v2.s[3]
593 FMLA v27.4s, v19.4s, v3.s[3]
594 FMLA v29.4s, v19.4s, v4.s[3]
595 FMLA v31.4s, v19.4s, v5.s[3]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700596
597 # Is there a remainder?- 2 floats of A (8 bytes)
5985:
Frank Barchard76f43f02021-05-12 14:52:01 -0700599 TBZ x0, 3, 6f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700600
601 # Remainder- 2 floats of A (8 bytes)
602 # Load A
Frank Barchard76f43f02021-05-12 14:52:01 -0700603 LDR d0, [x3], 8
604 LDR d1, [x9], 8
605 LDR d2, [x10], 8
606 LDR d3, [x11], 8
607 LDR d4, [x12], 8
608 LDR d5, [x4], 8
XNNPACK Teamb455b122019-09-27 18:10:33 -0700609 # Load B
Frank Barchard76f43f02021-05-12 14:52:01 -0700610 LDP q12, q13, [x5], 32
611 LDP q14, q15, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700612
Frank Barchard76f43f02021-05-12 14:52:01 -0700613 FMLA v20.4s, v12.4s, v0.s[0]
614 FMLA v22.4s, v12.4s, v1.s[0]
615 FMLA v24.4s, v12.4s, v2.s[0]
616 FMLA v26.4s, v12.4s, v3.s[0]
617 FMLA v28.4s, v12.4s, v4.s[0]
618 FMLA v30.4s, v12.4s, v5.s[0]
619 FMLA v21.4s, v13.4s, v0.s[0]
620 FMLA v23.4s, v13.4s, v1.s[0]
621 FMLA v25.4s, v13.4s, v2.s[0]
622 FMLA v27.4s, v13.4s, v3.s[0]
623 FMLA v29.4s, v13.4s, v4.s[0]
624 FMLA v31.4s, v13.4s, v5.s[0]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700625
Frank Barchard76f43f02021-05-12 14:52:01 -0700626 FMLA v20.4s, v14.4s, v0.s[1]
627 FMLA v22.4s, v14.4s, v1.s[1]
628 FMLA v24.4s, v14.4s, v2.s[1]
629 FMLA v26.4s, v14.4s, v3.s[1]
630 FMLA v28.4s, v14.4s, v4.s[1]
631 FMLA v30.4s, v14.4s, v5.s[1]
632 FMLA v21.4s, v15.4s, v0.s[1]
633 FMLA v23.4s, v15.4s, v1.s[1]
634 FMLA v25.4s, v15.4s, v2.s[1]
635 FMLA v27.4s, v15.4s, v3.s[1]
636 FMLA v29.4s, v15.4s, v4.s[1]
637 FMLA v31.4s, v15.4s, v5.s[1]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700638
639 # Is there a remainder?- 1 float of A (4 bytes)
6406:
Frank Barchard76f43f02021-05-12 14:52:01 -0700641 TBZ x0, 2, 3b
XNNPACK Teamb455b122019-09-27 18:10:33 -0700642
643 # Remainder- 1 float of A (4 bytes)
644 # Load A
Frank Barchard76f43f02021-05-12 14:52:01 -0700645 LDR s0, [x3], 4
646 LDR s1, [x9], 4
647 LDR s2, [x10], 4
648 LDR s3, [x11], 4
649 LDR s4, [x12], 4
650 LDR s5, [x4], 4
XNNPACK Teamb455b122019-09-27 18:10:33 -0700651 # Load B
Frank Barchard76f43f02021-05-12 14:52:01 -0700652 LDP q12, q13, [x5], 32
XNNPACK Teamb455b122019-09-27 18:10:33 -0700653
Frank Barchard76f43f02021-05-12 14:52:01 -0700654 FMLA v20.4s, v12.4s, v0.s[0]
655 FMLA v22.4s, v12.4s, v1.s[0]
656 FMLA v24.4s, v12.4s, v2.s[0]
657 FMLA v26.4s, v12.4s, v3.s[0]
658 FMLA v28.4s, v12.4s, v4.s[0]
659 FMLA v30.4s, v12.4s, v5.s[0]
660 FMLA v21.4s, v13.4s, v0.s[0]
661 FMLA v23.4s, v13.4s, v1.s[0]
662 FMLA v25.4s, v13.4s, v2.s[0]
663 FMLA v27.4s, v13.4s, v3.s[0]
664 FMLA v29.4s, v13.4s, v4.s[0]
665 FMLA v31.4s, v13.4s, v5.s[0]
666 B 3b
XNNPACK Teamb455b122019-09-27 18:10:33 -0700667
668 # Store odd width
6697:
Frank Barchard76f43f02021-05-12 14:52:01 -0700670 TBZ x1, 2, 8f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700671 $if INC:
Frank Barchard76f43f02021-05-12 14:52:01 -0700672 STR q30, [x7], 16
673 MOV v30.16b, v31.16b
674 STR q28, [x13], 16
675 MOV v28.16b, v29.16b
676 STR q26, [x14], 16
677 MOV v26.16b, v27.16b
678 STR q24, [x17], 16
679 MOV v24.16b, v25.16b
680 STR q22, [x16], 16
681 MOV v22.16b, v23.16b
682 STR q20, [x6], 16
683 MOV v20.16b, v21.16b
XNNPACK Teamb455b122019-09-27 18:10:33 -0700684 $else:
Frank Barchard76f43f02021-05-12 14:52:01 -0700685 STR q20, [x6], 16
686 MOV v20.16b, v21.16b
687 STR q22, [x16], 16
688 MOV v22.16b, v23.16b
689 STR q24, [x17], 16
690 MOV v24.16b, v25.16b
691 STR q26, [x14], 16
692 MOV v26.16b, v27.16b
693 STR q28, [x13], 16
694 MOV v28.16b, v29.16b
695 STR q30, [x7], 16
696 MOV v30.16b, v31.16b
XNNPACK Teamb455b122019-09-27 18:10:33 -07006978:
Frank Barchard76f43f02021-05-12 14:52:01 -0700698 TBZ x1, 1, 9f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700699 $if INC:
Frank Barchard76f43f02021-05-12 14:52:01 -0700700 STR d30, [x7], 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700701 STR d28, [x13], 8
Frank Barchardb7a7c302021-09-01 16:24:25 -0700702 DUP d30, v30.d[1]
Frank Barchard76f43f02021-05-12 14:52:01 -0700703 DUP d28, v28.d[1]
704 STR d26, [x14], 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700705 STR d24, [x17], 8
Frank Barchardb7a7c302021-09-01 16:24:25 -0700706 DUP d26, v26.d[1]
Frank Barchard76f43f02021-05-12 14:52:01 -0700707 DUP d24, v24.d[1]
708 STR d22, [x16], 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700709 STR d20, [x6], 8
Frank Barchardb7a7c302021-09-01 16:24:25 -0700710 DUP d22, v22.d[1]
Frank Barchard76f43f02021-05-12 14:52:01 -0700711 DUP d20, v20.d[1]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700712 $else:
Frank Barchard76f43f02021-05-12 14:52:01 -0700713 STR d20, [x6], 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700714 STR d22, [x16], 8
Frank Barchardb7a7c302021-09-01 16:24:25 -0700715 DUP d20, v20.d[1]
Frank Barchard76f43f02021-05-12 14:52:01 -0700716 DUP d22, v22.d[1]
717 STR d24, [x17], 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700718 STR d26, [x14], 8
Frank Barchardb7a7c302021-09-01 16:24:25 -0700719 DUP d24, v24.d[1]
Frank Barchard76f43f02021-05-12 14:52:01 -0700720 DUP d26, v26.d[1]
721 STR d28, [x13], 8
Frank Barchard76f43f02021-05-12 14:52:01 -0700722 STR d30, [x7], 8
Frank Barchardb7a7c302021-09-01 16:24:25 -0700723 DUP d28, v28.d[1]
Frank Barchard76f43f02021-05-12 14:52:01 -0700724 DUP d30, v30.d[1]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700725
7269:
Frank Barchard76f43f02021-05-12 14:52:01 -0700727 TBZ x1, 0, 10f
XNNPACK Teamb455b122019-09-27 18:10:33 -0700728 $if INC:
Frank Barchard76f43f02021-05-12 14:52:01 -0700729 STR s30, [x7]
730 STR s28, [x13]
731 STR s26, [x14]
732 STR s24, [x17]
733 STR s22, [x16]
734 STR s20, [x6]
XNNPACK Teamb455b122019-09-27 18:10:33 -0700735 $else:
Frank Barchard76f43f02021-05-12 14:52:01 -0700736 STR s20, [x6]
737 STR s22, [x16]
738 STR s24, [x17]
739 STR s26, [x14]
740 STR s28, [x13]
741 STR s30, [x7]
XNNPACK Teamb455b122019-09-27 18:10:33 -070074210:
743 # Restore d8-d15 from stack
Frank Barchard76f43f02021-05-12 14:52:01 -0700744 LDP d14, d15, [sp, 48]
745 LDP d12, d13, [sp, 32]
746 LDP d10, d11, [sp, 16]
747 LDP d8, d9, [sp], 64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700748 RET
749
Frank Barcharda2f18912021-12-28 14:17:05 -0800750END_FUNCTION xnn_f32_gemm${"inc" if INC else ""}_minmax_ukernel_6x8__aarch64_neonfma${"_prfm" if PREFETCH else ""}_cortex_a75
XNNPACK Teamb455b122019-09-27 18:10:33 -0700751
752#ifdef __ELF__
753.section ".note.GNU-stack","",%progbits
754#endif