blob: 018f26c54e4778025557efd36212eee9509b2a09 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
Marat Dukhan163a7e62020-04-09 04:19:26 -070010// Specification: test/f16-gemm-minmax.yaml
XNNPACK Teamb455b122019-09-27 18:10:33 -070011// Generator: tools/generate-gemm-test.py
12
13
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <gtest/gtest.h>
15
Marat Dukhan1dadbf72019-10-01 10:46:20 -070016#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
XNNPACK Teamb455b122019-09-27 18:10:33 -070019#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan1dadbf72019-10-01 10:46:20 -070025#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070026 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070027 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(4)
Marat Dukhande06f492020-04-09 00:19:31 -070036 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -070037 }
38
Marat Dukhande06f492020-04-09 00:19:31 -070039 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070040 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(4)
49 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -070050 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -070051 }
52
Marat Dukhande06f492020-04-09 00:19:31 -070053 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070054 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(4)
63 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -070064 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -070065 }
66
Marat Dukhande06f492020-04-09 00:19:31 -070067 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(4)
79 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -070080 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -070081 }
82 }
83 }
84
Marat Dukhande06f492020-04-09 00:19:31 -070085 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070086 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(4)
96 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -070097 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -070098 }
99 }
100
Marat Dukhande06f492020-04-09 00:19:31 -0700101 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700102 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(4)
112 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700113 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700114 }
115 }
116
Marat Dukhande06f492020-04-09 00:19:31 -0700117 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
119 for (size_t k = 1; k < 4; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700128 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700129 }
130 }
131
Marat Dukhande06f492020-04-09 00:19:31 -0700132 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700133 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
134 for (size_t k = 1; k < 4; k++) {
135 GemmMicrokernelTester()
136 .mr(4)
137 .nr(8)
138 .kr(1)
139 .sr(1)
140 .m(4)
141 .n(8)
142 .k(k)
143 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700144 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700145 }
146 }
147
Marat Dukhande06f492020-04-09 00:19:31 -0700148 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700149 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
150 for (size_t k = 1; k < 4; k++) {
151 for (uint32_t m = 1; m <= 4; m++) {
152 for (uint32_t n = 1; n <= 8; n++) {
153 GemmMicrokernelTester()
154 .mr(4)
155 .nr(8)
156 .kr(1)
157 .sr(1)
158 .m(m)
159 .n(n)
160 .k(k)
161 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700162 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700163 }
164 }
165 }
166 }
167
Marat Dukhande06f492020-04-09 00:19:31 -0700168 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700169 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
170 for (size_t k = 5; k < 8; k++) {
171 GemmMicrokernelTester()
172 .mr(4)
173 .nr(8)
174 .kr(1)
175 .sr(1)
176 .m(4)
177 .n(8)
178 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700179 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700180 }
181 }
182
Marat Dukhande06f492020-04-09 00:19:31 -0700183 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700184 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
185 for (size_t k = 5; k < 8; k++) {
186 GemmMicrokernelTester()
187 .mr(4)
188 .nr(8)
189 .kr(1)
190 .sr(1)
191 .m(4)
192 .n(8)
193 .k(k)
194 .a_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700195 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700196 }
197 }
198
Marat Dukhande06f492020-04-09 00:19:31 -0700199 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700200 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
201 for (size_t k = 5; k < 8; k++) {
202 for (uint32_t m = 1; m <= 4; m++) {
203 for (uint32_t n = 1; n <= 8; n++) {
204 GemmMicrokernelTester()
205 .mr(4)
206 .nr(8)
207 .kr(1)
208 .sr(1)
209 .m(m)
210 .n(n)
211 .k(k)
212 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700213 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700214 }
215 }
216 }
217 }
218
Marat Dukhande06f492020-04-09 00:19:31 -0700219 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700220 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
221 for (size_t k = 8; k <= 40; k += 4) {
222 GemmMicrokernelTester()
223 .mr(4)
224 .nr(8)
225 .kr(1)
226 .sr(1)
227 .m(4)
228 .n(8)
229 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700230 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700231 }
232 }
233
Marat Dukhande06f492020-04-09 00:19:31 -0700234 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700235 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
236 for (size_t k = 8; k <= 40; k += 4) {
237 GemmMicrokernelTester()
238 .mr(4)
239 .nr(8)
240 .kr(1)
241 .sr(1)
242 .m(4)
243 .n(8)
244 .k(k)
245 .a_stride(43)
Marat Dukhande06f492020-04-09 00:19:31 -0700246 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700247 }
248 }
249
Marat Dukhande06f492020-04-09 00:19:31 -0700250 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700251 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
252 for (size_t k = 8; k <= 40; k += 4) {
253 for (uint32_t m = 1; m <= 4; m++) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 GemmMicrokernelTester()
256 .mr(4)
257 .nr(8)
258 .kr(1)
259 .sr(1)
260 .m(m)
261 .n(n)
262 .k(k)
263 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700264 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700265 }
266 }
267 }
268 }
269
Marat Dukhande06f492020-04-09 00:19:31 -0700270 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700271 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
272 for (uint32_t n = 9; n < 16; n++) {
273 for (size_t k = 1; k <= 20; k += 5) {
274 GemmMicrokernelTester()
275 .mr(4)
276 .nr(8)
277 .kr(1)
278 .sr(1)
279 .m(4)
280 .n(8)
281 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700282 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700283 }
284 }
285 }
286
Marat Dukhande06f492020-04-09 00:19:31 -0700287 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700288 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
289 for (uint32_t n = 9; n < 16; n++) {
290 for (size_t k = 1; k <= 20; k += 5) {
291 GemmMicrokernelTester()
292 .mr(4)
293 .nr(8)
294 .kr(1)
295 .sr(1)
296 .m(4)
297 .n(8)
298 .k(k)
299 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700300 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700301 }
302 }
303 }
304
Marat Dukhande06f492020-04-09 00:19:31 -0700305 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700306 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
307 for (uint32_t n = 9; n < 16; n++) {
308 for (size_t k = 1; k <= 20; k += 5) {
309 GemmMicrokernelTester()
310 .mr(4)
311 .nr(8)
312 .kr(1)
313 .sr(1)
314 .m(4)
315 .n(n)
316 .k(k)
317 .a_stride(23)
Marat Dukhande06f492020-04-09 00:19:31 -0700318 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700319 }
320 }
321 }
322
Marat Dukhande06f492020-04-09 00:19:31 -0700323 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700324 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
325 for (uint32_t n = 9; n < 16; n++) {
326 for (size_t k = 1; k <= 20; k += 5) {
327 for (uint32_t m = 1; m <= 4; m++) {
328 GemmMicrokernelTester()
329 .mr(4)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(m)
334 .n(n)
335 .k(k)
336 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700337 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700338 }
339 }
340 }
341 }
342
Marat Dukhande06f492020-04-09 00:19:31 -0700343 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700344 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
345 for (uint32_t n = 16; n <= 24; n += 8) {
346 for (size_t k = 1; k <= 20; k += 5) {
347 GemmMicrokernelTester()
348 .mr(4)
349 .nr(8)
350 .kr(1)
351 .sr(1)
352 .m(4)
353 .n(8)
354 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700355 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700356 }
357 }
358 }
359
Marat Dukhande06f492020-04-09 00:19:31 -0700360 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700361 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
362 for (uint32_t n = 16; n <= 24; n += 8) {
363 for (size_t k = 1; k <= 20; k += 5) {
364 GemmMicrokernelTester()
365 .mr(4)
366 .nr(8)
367 .kr(1)
368 .sr(1)
369 .m(4)
370 .n(n)
371 .k(k)
372 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700373 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700374 }
375 }
376 }
377
Marat Dukhande06f492020-04-09 00:19:31 -0700378 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700379 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
380 for (uint32_t n = 16; n <= 24; n += 8) {
381 for (size_t k = 1; k <= 20; k += 5) {
382 GemmMicrokernelTester()
383 .mr(4)
384 .nr(8)
385 .kr(1)
386 .sr(1)
387 .m(4)
388 .n(n)
389 .k(k)
390 .a_stride(23)
Marat Dukhande06f492020-04-09 00:19:31 -0700391 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700392 }
393 }
394 }
395
Marat Dukhande06f492020-04-09 00:19:31 -0700396 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700397 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
398 for (uint32_t n = 16; n <= 24; n += 8) {
399 for (size_t k = 1; k <= 20; k += 5) {
400 for (uint32_t m = 1; m <= 4; m++) {
401 GemmMicrokernelTester()
402 .mr(4)
403 .nr(8)
404 .kr(1)
405 .sr(1)
406 .m(m)
407 .n(n)
408 .k(k)
409 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700410 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700411 }
412 }
413 }
414 }
415
Marat Dukhande06f492020-04-09 00:19:31 -0700416 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700417 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
418 for (size_t k = 1; k <= 20; k += 5) {
419 for (uint32_t m = 1; m <= 4; m++) {
420 for (uint32_t n = 1; n <= 8; n++) {
421 GemmMicrokernelTester()
422 .mr(4)
423 .nr(8)
424 .kr(1)
425 .sr(1)
426 .m(m)
427 .n(n)
428 .k(k)
429 .cm_stride(11)
430 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700431 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700432 }
433 }
434 }
435 }
436
Marat Dukhande06f492020-04-09 00:19:31 -0700437 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700438 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
439 GemmMicrokernelTester()
440 .mr(4)
441 .nr(8)
442 .kr(1)
443 .sr(1)
444 .m(4)
445 .n(8)
446 .k(4)
447 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700448 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700449 }
450
Marat Dukhande06f492020-04-09 00:19:31 -0700451 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700452 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(4)
459 .n(8)
460 .k(4)
461 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700462 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700463 }
464
Marat Dukhande06f492020-04-09 00:19:31 -0700465 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cm) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700466 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(4)
473 .n(8)
474 .k(4)
475 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700476 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700477 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700478#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700479
480
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700481#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700482 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700483 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
484 GemmMicrokernelTester()
485 .mr(6)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(6)
490 .n(8)
491 .k(4)
Marat Dukhande06f492020-04-09 00:19:31 -0700492 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700493 }
494
Marat Dukhande06f492020-04-09 00:19:31 -0700495 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700496 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
497 GemmMicrokernelTester()
498 .mr(6)
499 .nr(8)
500 .kr(1)
501 .sr(1)
502 .m(6)
503 .n(8)
504 .k(4)
505 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700506 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700507 }
508
Marat Dukhande06f492020-04-09 00:19:31 -0700509 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700510 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
511 GemmMicrokernelTester()
512 .mr(6)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(6)
517 .n(8)
518 .k(4)
519 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700520 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700521 }
522
Marat Dukhande06f492020-04-09 00:19:31 -0700523 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700524 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
525 for (uint32_t m = 1; m <= 6; m++) {
526 for (uint32_t n = 1; n <= 8; n++) {
527 GemmMicrokernelTester()
528 .mr(6)
529 .nr(8)
530 .kr(1)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(4)
535 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700536 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700537 }
538 }
539 }
540
Marat Dukhande06f492020-04-09 00:19:31 -0700541 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700542 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
543 for (uint32_t m = 1; m <= 6; m++) {
544 GemmMicrokernelTester()
545 .mr(6)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(m)
550 .n(8)
551 .k(4)
552 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700553 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700554 }
555 }
556
Marat Dukhande06f492020-04-09 00:19:31 -0700557 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700558 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
559 for (uint32_t n = 1; n <= 8; n++) {
560 GemmMicrokernelTester()
561 .mr(6)
562 .nr(8)
563 .kr(1)
564 .sr(1)
565 .m(6)
566 .n(n)
567 .k(4)
568 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700569 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700570 }
571 }
572
Marat Dukhande06f492020-04-09 00:19:31 -0700573 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700574 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
575 for (size_t k = 1; k < 4; k++) {
576 GemmMicrokernelTester()
577 .mr(6)
578 .nr(8)
579 .kr(1)
580 .sr(1)
581 .m(6)
582 .n(8)
583 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700584 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700585 }
586 }
587
Marat Dukhande06f492020-04-09 00:19:31 -0700588 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700589 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
590 for (size_t k = 1; k < 4; k++) {
591 GemmMicrokernelTester()
592 .mr(6)
593 .nr(8)
594 .kr(1)
595 .sr(1)
596 .m(6)
597 .n(8)
598 .k(k)
599 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700600 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700601 }
602 }
603
Marat Dukhande06f492020-04-09 00:19:31 -0700604 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700605 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
606 for (size_t k = 1; k < 4; k++) {
607 for (uint32_t m = 1; m <= 6; m++) {
608 for (uint32_t n = 1; n <= 8; n++) {
609 GemmMicrokernelTester()
610 .mr(6)
611 .nr(8)
612 .kr(1)
613 .sr(1)
614 .m(m)
615 .n(n)
616 .k(k)
617 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700618 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700619 }
620 }
621 }
622 }
623
Marat Dukhande06f492020-04-09 00:19:31 -0700624 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700625 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
626 for (size_t k = 5; k < 8; k++) {
627 GemmMicrokernelTester()
628 .mr(6)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(6)
633 .n(8)
634 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700635 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700636 }
637 }
638
Marat Dukhande06f492020-04-09 00:19:31 -0700639 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700640 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
641 for (size_t k = 5; k < 8; k++) {
642 GemmMicrokernelTester()
643 .mr(6)
644 .nr(8)
645 .kr(1)
646 .sr(1)
647 .m(6)
648 .n(8)
649 .k(k)
650 .a_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700651 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700652 }
653 }
654
Marat Dukhande06f492020-04-09 00:19:31 -0700655 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700656 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
657 for (size_t k = 5; k < 8; k++) {
658 for (uint32_t m = 1; m <= 6; m++) {
659 for (uint32_t n = 1; n <= 8; n++) {
660 GemmMicrokernelTester()
661 .mr(6)
662 .nr(8)
663 .kr(1)
664 .sr(1)
665 .m(m)
666 .n(n)
667 .k(k)
668 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700669 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700670 }
671 }
672 }
673 }
674
Marat Dukhande06f492020-04-09 00:19:31 -0700675 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700676 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
677 for (size_t k = 8; k <= 40; k += 4) {
678 GemmMicrokernelTester()
679 .mr(6)
680 .nr(8)
681 .kr(1)
682 .sr(1)
683 .m(6)
684 .n(8)
685 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700686 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700687 }
688 }
689
Marat Dukhande06f492020-04-09 00:19:31 -0700690 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700691 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
692 for (size_t k = 8; k <= 40; k += 4) {
693 GemmMicrokernelTester()
694 .mr(6)
695 .nr(8)
696 .kr(1)
697 .sr(1)
698 .m(6)
699 .n(8)
700 .k(k)
701 .a_stride(43)
Marat Dukhande06f492020-04-09 00:19:31 -0700702 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700703 }
704 }
705
Marat Dukhande06f492020-04-09 00:19:31 -0700706 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700707 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
708 for (size_t k = 8; k <= 40; k += 4) {
709 for (uint32_t m = 1; m <= 6; m++) {
710 for (uint32_t n = 1; n <= 8; n++) {
711 GemmMicrokernelTester()
712 .mr(6)
713 .nr(8)
714 .kr(1)
715 .sr(1)
716 .m(m)
717 .n(n)
718 .k(k)
719 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700720 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700721 }
722 }
723 }
724 }
725
Marat Dukhande06f492020-04-09 00:19:31 -0700726 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700727 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
728 for (uint32_t n = 9; n < 16; n++) {
729 for (size_t k = 1; k <= 20; k += 5) {
730 GemmMicrokernelTester()
731 .mr(6)
732 .nr(8)
733 .kr(1)
734 .sr(1)
735 .m(6)
736 .n(8)
737 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700738 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700739 }
740 }
741 }
742
Marat Dukhande06f492020-04-09 00:19:31 -0700743 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700744 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
745 for (uint32_t n = 9; n < 16; n++) {
746 for (size_t k = 1; k <= 20; k += 5) {
747 GemmMicrokernelTester()
748 .mr(6)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(6)
753 .n(8)
754 .k(k)
755 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700756 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700757 }
758 }
759 }
760
Marat Dukhande06f492020-04-09 00:19:31 -0700761 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700762 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
763 for (uint32_t n = 9; n < 16; n++) {
764 for (size_t k = 1; k <= 20; k += 5) {
765 GemmMicrokernelTester()
766 .mr(6)
767 .nr(8)
768 .kr(1)
769 .sr(1)
770 .m(6)
771 .n(n)
772 .k(k)
773 .a_stride(23)
Marat Dukhande06f492020-04-09 00:19:31 -0700774 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700775 }
776 }
777 }
778
Marat Dukhande06f492020-04-09 00:19:31 -0700779 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700780 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
781 for (uint32_t n = 9; n < 16; n++) {
782 for (size_t k = 1; k <= 20; k += 5) {
783 for (uint32_t m = 1; m <= 6; m++) {
784 GemmMicrokernelTester()
785 .mr(6)
786 .nr(8)
787 .kr(1)
788 .sr(1)
789 .m(m)
790 .n(n)
791 .k(k)
792 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700793 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700794 }
795 }
796 }
797 }
798
Marat Dukhande06f492020-04-09 00:19:31 -0700799 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700800 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
801 for (uint32_t n = 16; n <= 24; n += 8) {
802 for (size_t k = 1; k <= 20; k += 5) {
803 GemmMicrokernelTester()
804 .mr(6)
805 .nr(8)
806 .kr(1)
807 .sr(1)
808 .m(6)
809 .n(8)
810 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -0700811 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700812 }
813 }
814 }
815
Marat Dukhande06f492020-04-09 00:19:31 -0700816 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700817 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
818 for (uint32_t n = 16; n <= 24; n += 8) {
819 for (size_t k = 1; k <= 20; k += 5) {
820 GemmMicrokernelTester()
821 .mr(6)
822 .nr(8)
823 .kr(1)
824 .sr(1)
825 .m(6)
826 .n(n)
827 .k(k)
828 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700829 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700830 }
831 }
832 }
833
Marat Dukhande06f492020-04-09 00:19:31 -0700834 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700835 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
836 for (uint32_t n = 16; n <= 24; n += 8) {
837 for (size_t k = 1; k <= 20; k += 5) {
838 GemmMicrokernelTester()
839 .mr(6)
840 .nr(8)
841 .kr(1)
842 .sr(1)
843 .m(6)
844 .n(n)
845 .k(k)
846 .a_stride(23)
Marat Dukhande06f492020-04-09 00:19:31 -0700847 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700848 }
849 }
850 }
851
Marat Dukhande06f492020-04-09 00:19:31 -0700852 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700853 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
854 for (uint32_t n = 16; n <= 24; n += 8) {
855 for (size_t k = 1; k <= 20; k += 5) {
856 for (uint32_t m = 1; m <= 6; m++) {
857 GemmMicrokernelTester()
858 .mr(6)
859 .nr(8)
860 .kr(1)
861 .sr(1)
862 .m(m)
863 .n(n)
864 .k(k)
865 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700866 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700867 }
868 }
869 }
870 }
871
Marat Dukhande06f492020-04-09 00:19:31 -0700872 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700873 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
874 for (size_t k = 1; k <= 20; k += 5) {
875 for (uint32_t m = 1; m <= 6; m++) {
876 for (uint32_t n = 1; n <= 8; n++) {
877 GemmMicrokernelTester()
878 .mr(6)
879 .nr(8)
880 .kr(1)
881 .sr(1)
882 .m(m)
883 .n(n)
884 .k(k)
885 .cm_stride(11)
886 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700887 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700888 }
889 }
890 }
891 }
892
Marat Dukhande06f492020-04-09 00:19:31 -0700893 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700894 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
895 GemmMicrokernelTester()
896 .mr(6)
897 .nr(8)
898 .kr(1)
899 .sr(1)
900 .m(6)
901 .n(8)
902 .k(4)
903 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700904 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700905 }
906
Marat Dukhande06f492020-04-09 00:19:31 -0700907 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700908 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
909 GemmMicrokernelTester()
910 .mr(6)
911 .nr(8)
912 .kr(1)
913 .sr(1)
914 .m(6)
915 .n(8)
916 .k(4)
917 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -0700918 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700919 }
920
Marat Dukhande06f492020-04-09 00:19:31 -0700921 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cm) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700922 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
923 GemmMicrokernelTester()
924 .mr(6)
925 .nr(8)
926 .kr(1)
927 .sr(1)
928 .m(6)
929 .n(8)
930 .k(4)
931 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700932 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700933 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700934#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700935
936
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700937#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -0700938 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700939 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
940 GemmMicrokernelTester()
941 .mr(8)
942 .nr(8)
943 .kr(1)
944 .sr(1)
945 .m(8)
946 .n(8)
947 .k(4)
Marat Dukhande06f492020-04-09 00:19:31 -0700948 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700949 }
950
Marat Dukhande06f492020-04-09 00:19:31 -0700951 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700952 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
953 GemmMicrokernelTester()
954 .mr(8)
955 .nr(8)
956 .kr(1)
957 .sr(1)
958 .m(8)
959 .n(8)
960 .k(4)
961 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -0700962 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700963 }
964
Marat Dukhande06f492020-04-09 00:19:31 -0700965 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700966 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
967 GemmMicrokernelTester()
968 .mr(8)
969 .nr(8)
970 .kr(1)
971 .sr(1)
972 .m(8)
973 .n(8)
974 .k(4)
975 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -0700976 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700977 }
978
Marat Dukhande06f492020-04-09 00:19:31 -0700979 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700980 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
981 for (uint32_t m = 1; m <= 8; m++) {
982 for (uint32_t n = 1; n <= 8; n++) {
983 GemmMicrokernelTester()
984 .mr(8)
985 .nr(8)
986 .kr(1)
987 .sr(1)
988 .m(m)
989 .n(n)
990 .k(4)
991 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -0700992 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700993 }
994 }
995 }
996
Marat Dukhande06f492020-04-09 00:19:31 -0700997 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700998 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
999 for (uint32_t m = 1; m <= 8; m++) {
1000 GemmMicrokernelTester()
1001 .mr(8)
1002 .nr(8)
1003 .kr(1)
1004 .sr(1)
1005 .m(m)
1006 .n(8)
1007 .k(4)
1008 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001009 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001010 }
1011 }
1012
Marat Dukhande06f492020-04-09 00:19:31 -07001013 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001014 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1015 for (uint32_t n = 1; n <= 8; n++) {
1016 GemmMicrokernelTester()
1017 .mr(8)
1018 .nr(8)
1019 .kr(1)
1020 .sr(1)
1021 .m(8)
1022 .n(n)
1023 .k(4)
1024 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001025 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001026 }
1027 }
1028
Marat Dukhande06f492020-04-09 00:19:31 -07001029 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001030 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1031 for (size_t k = 1; k < 4; k++) {
1032 GemmMicrokernelTester()
1033 .mr(8)
1034 .nr(8)
1035 .kr(1)
1036 .sr(1)
1037 .m(8)
1038 .n(8)
1039 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001040 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001041 }
1042 }
1043
Marat Dukhande06f492020-04-09 00:19:31 -07001044 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001045 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1046 for (size_t k = 1; k < 4; k++) {
1047 GemmMicrokernelTester()
1048 .mr(8)
1049 .nr(8)
1050 .kr(1)
1051 .sr(1)
1052 .m(8)
1053 .n(8)
1054 .k(k)
1055 .a_stride(7)
Marat Dukhande06f492020-04-09 00:19:31 -07001056 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001057 }
1058 }
1059
Marat Dukhande06f492020-04-09 00:19:31 -07001060 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001061 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1062 for (size_t k = 1; k < 4; k++) {
1063 for (uint32_t m = 1; m <= 8; m++) {
1064 for (uint32_t n = 1; n <= 8; n++) {
1065 GemmMicrokernelTester()
1066 .mr(8)
1067 .nr(8)
1068 .kr(1)
1069 .sr(1)
1070 .m(m)
1071 .n(n)
1072 .k(k)
1073 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001074 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001075 }
1076 }
1077 }
1078 }
1079
Marat Dukhande06f492020-04-09 00:19:31 -07001080 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001081 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1082 for (size_t k = 5; k < 8; k++) {
1083 GemmMicrokernelTester()
1084 .mr(8)
1085 .nr(8)
1086 .kr(1)
1087 .sr(1)
1088 .m(8)
1089 .n(8)
1090 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001091 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001092 }
1093 }
1094
Marat Dukhande06f492020-04-09 00:19:31 -07001095 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001096 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1097 for (size_t k = 5; k < 8; k++) {
1098 GemmMicrokernelTester()
1099 .mr(8)
1100 .nr(8)
1101 .kr(1)
1102 .sr(1)
1103 .m(8)
1104 .n(8)
1105 .k(k)
1106 .a_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001107 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001108 }
1109 }
1110
Marat Dukhande06f492020-04-09 00:19:31 -07001111 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001112 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1113 for (size_t k = 5; k < 8; k++) {
1114 for (uint32_t m = 1; m <= 8; m++) {
1115 for (uint32_t n = 1; n <= 8; n++) {
1116 GemmMicrokernelTester()
1117 .mr(8)
1118 .nr(8)
1119 .kr(1)
1120 .sr(1)
1121 .m(m)
1122 .n(n)
1123 .k(k)
1124 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001125 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001126 }
1127 }
1128 }
1129 }
1130
Marat Dukhande06f492020-04-09 00:19:31 -07001131 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001132 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1133 for (size_t k = 8; k <= 40; k += 4) {
1134 GemmMicrokernelTester()
1135 .mr(8)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(8)
1140 .n(8)
1141 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001142 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001143 }
1144 }
1145
Marat Dukhande06f492020-04-09 00:19:31 -07001146 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001147 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1148 for (size_t k = 8; k <= 40; k += 4) {
1149 GemmMicrokernelTester()
1150 .mr(8)
1151 .nr(8)
1152 .kr(1)
1153 .sr(1)
1154 .m(8)
1155 .n(8)
1156 .k(k)
1157 .a_stride(43)
Marat Dukhande06f492020-04-09 00:19:31 -07001158 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001159 }
1160 }
1161
Marat Dukhande06f492020-04-09 00:19:31 -07001162 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001163 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1164 for (size_t k = 8; k <= 40; k += 4) {
1165 for (uint32_t m = 1; m <= 8; m++) {
1166 for (uint32_t n = 1; n <= 8; n++) {
1167 GemmMicrokernelTester()
1168 .mr(8)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(m)
1173 .n(n)
1174 .k(k)
1175 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001176 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001177 }
1178 }
1179 }
1180 }
1181
Marat Dukhande06f492020-04-09 00:19:31 -07001182 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001183 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1184 for (uint32_t n = 9; n < 16; n++) {
1185 for (size_t k = 1; k <= 20; k += 5) {
1186 GemmMicrokernelTester()
1187 .mr(8)
1188 .nr(8)
1189 .kr(1)
1190 .sr(1)
1191 .m(8)
1192 .n(8)
1193 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001194 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001195 }
1196 }
1197 }
1198
Marat Dukhande06f492020-04-09 00:19:31 -07001199 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001200 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1201 for (uint32_t n = 9; n < 16; n++) {
1202 for (size_t k = 1; k <= 20; k += 5) {
1203 GemmMicrokernelTester()
1204 .mr(8)
1205 .nr(8)
1206 .kr(1)
1207 .sr(1)
1208 .m(8)
1209 .n(8)
1210 .k(k)
1211 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001212 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001213 }
1214 }
1215 }
1216
Marat Dukhande06f492020-04-09 00:19:31 -07001217 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001218 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1219 for (uint32_t n = 9; n < 16; n++) {
1220 for (size_t k = 1; k <= 20; k += 5) {
1221 GemmMicrokernelTester()
1222 .mr(8)
1223 .nr(8)
1224 .kr(1)
1225 .sr(1)
1226 .m(8)
1227 .n(n)
1228 .k(k)
1229 .a_stride(23)
Marat Dukhande06f492020-04-09 00:19:31 -07001230 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001231 }
1232 }
1233 }
1234
Marat Dukhande06f492020-04-09 00:19:31 -07001235 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001236 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1237 for (uint32_t n = 9; n < 16; n++) {
1238 for (size_t k = 1; k <= 20; k += 5) {
1239 for (uint32_t m = 1; m <= 8; m++) {
1240 GemmMicrokernelTester()
1241 .mr(8)
1242 .nr(8)
1243 .kr(1)
1244 .sr(1)
1245 .m(m)
1246 .n(n)
1247 .k(k)
1248 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001249 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001250 }
1251 }
1252 }
1253 }
1254
Marat Dukhande06f492020-04-09 00:19:31 -07001255 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001256 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1257 for (uint32_t n = 16; n <= 24; n += 8) {
1258 for (size_t k = 1; k <= 20; k += 5) {
1259 GemmMicrokernelTester()
1260 .mr(8)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(8)
1265 .n(8)
1266 .k(k)
Marat Dukhande06f492020-04-09 00:19:31 -07001267 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001268 }
1269 }
1270 }
1271
Marat Dukhande06f492020-04-09 00:19:31 -07001272 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001273 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1274 for (uint32_t n = 16; n <= 24; n += 8) {
1275 for (size_t k = 1; k <= 20; k += 5) {
1276 GemmMicrokernelTester()
1277 .mr(8)
1278 .nr(8)
1279 .kr(1)
1280 .sr(1)
1281 .m(8)
1282 .n(n)
1283 .k(k)
1284 .cn_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001285 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001286 }
1287 }
1288 }
1289
Marat Dukhande06f492020-04-09 00:19:31 -07001290 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001291 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1292 for (uint32_t n = 16; n <= 24; n += 8) {
1293 for (size_t k = 1; k <= 20; k += 5) {
1294 GemmMicrokernelTester()
1295 .mr(8)
1296 .nr(8)
1297 .kr(1)
1298 .sr(1)
1299 .m(8)
1300 .n(n)
1301 .k(k)
1302 .a_stride(23)
Marat Dukhande06f492020-04-09 00:19:31 -07001303 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001304 }
1305 }
1306 }
1307
Marat Dukhande06f492020-04-09 00:19:31 -07001308 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001309 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1310 for (uint32_t n = 16; n <= 24; n += 8) {
1311 for (size_t k = 1; k <= 20; k += 5) {
1312 for (uint32_t m = 1; m <= 8; m++) {
1313 GemmMicrokernelTester()
1314 .mr(8)
1315 .nr(8)
1316 .kr(1)
1317 .sr(1)
1318 .m(m)
1319 .n(n)
1320 .k(k)
1321 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001322 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001323 }
1324 }
1325 }
1326 }
1327
Marat Dukhande06f492020-04-09 00:19:31 -07001328 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001329 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1330 for (size_t k = 1; k <= 20; k += 5) {
1331 for (uint32_t m = 1; m <= 8; m++) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 GemmMicrokernelTester()
1334 .mr(8)
1335 .nr(8)
1336 .kr(1)
1337 .sr(1)
1338 .m(m)
1339 .n(n)
1340 .k(k)
1341 .cm_stride(11)
1342 .iterations(1)
Marat Dukhande06f492020-04-09 00:19:31 -07001343 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001344 }
1345 }
1346 }
1347 }
1348
Marat Dukhande06f492020-04-09 00:19:31 -07001349 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001350 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1351 GemmMicrokernelTester()
1352 .mr(8)
1353 .nr(8)
1354 .kr(1)
1355 .sr(1)
1356 .m(8)
1357 .n(8)
1358 .k(4)
1359 .qmin(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001360 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001361 }
1362
Marat Dukhande06f492020-04-09 00:19:31 -07001363 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001364 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1365 GemmMicrokernelTester()
1366 .mr(8)
1367 .nr(8)
1368 .kr(1)
1369 .sr(1)
1370 .m(8)
1371 .n(8)
1372 .k(4)
1373 .qmax(128)
Marat Dukhande06f492020-04-09 00:19:31 -07001374 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001375 }
1376
Marat Dukhande06f492020-04-09 00:19:31 -07001377 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cm) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001378 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1379 GemmMicrokernelTester()
1380 .mr(8)
1381 .nr(8)
1382 .kr(1)
1383 .sr(1)
1384 .m(8)
1385 .n(8)
1386 .k(4)
1387 .cm_stride(11)
Marat Dukhande06f492020-04-09 00:19:31 -07001388 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001389 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001390#endif // XNN_ARCH_ARM64
Frank Barchard683f5592020-04-10 00:48:26 -07001391
1392
1393#if XNN_ARCH_ARM64
Frank Barchard36b76b62020-04-10 12:39:17 -07001394 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2) {
1395 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1396 GemmMicrokernelTester()
1397 .mr(1)
1398 .nr(16)
1399 .kr(1)
1400 .sr(1)
1401 .m(1)
1402 .n(16)
1403 .k(2)
1404 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1405 }
1406
1407 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, strided_cn) {
1408 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1409 GemmMicrokernelTester()
1410 .mr(1)
1411 .nr(16)
1412 .kr(1)
1413 .sr(1)
1414 .m(1)
1415 .n(16)
1416 .k(2)
1417 .cn_stride(19)
1418 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1419 }
1420
1421 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_strided_a) {
1422 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1423 GemmMicrokernelTester()
1424 .mr(1)
1425 .nr(16)
1426 .kr(1)
1427 .sr(1)
1428 .m(1)
1429 .n(16)
1430 .k(2)
1431 .a_stride(5)
1432 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1433 }
1434
1435 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile) {
1436 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1437 for (uint32_t m = 1; m <= 1; m++) {
1438 for (uint32_t n = 1; n <= 16; n++) {
1439 GemmMicrokernelTester()
1440 .mr(1)
1441 .nr(16)
1442 .kr(1)
1443 .sr(1)
1444 .m(m)
1445 .n(n)
1446 .k(2)
1447 .iterations(1)
1448 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1449 }
1450 }
1451 }
1452
1453 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_m) {
1454 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1455 for (uint32_t m = 1; m <= 1; m++) {
1456 GemmMicrokernelTester()
1457 .mr(1)
1458 .nr(16)
1459 .kr(1)
1460 .sr(1)
1461 .m(m)
1462 .n(16)
1463 .k(2)
1464 .iterations(1)
1465 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1466 }
1467 }
1468
1469 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_n) {
1470 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1471 for (uint32_t n = 1; n <= 16; n++) {
1472 GemmMicrokernelTester()
1473 .mr(1)
1474 .nr(16)
1475 .kr(1)
1476 .sr(1)
1477 .m(1)
1478 .n(n)
1479 .k(2)
1480 .iterations(1)
1481 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1482 }
1483 }
1484
1485 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2) {
1486 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1487 for (size_t k = 1; k < 2; k++) {
1488 GemmMicrokernelTester()
1489 .mr(1)
1490 .nr(16)
1491 .kr(1)
1492 .sr(1)
1493 .m(1)
1494 .n(16)
1495 .k(k)
1496 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1497 }
1498 }
1499
1500 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_strided_a) {
1501 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1502 for (size_t k = 1; k < 2; k++) {
1503 GemmMicrokernelTester()
1504 .mr(1)
1505 .nr(16)
1506 .kr(1)
1507 .sr(1)
1508 .m(1)
1509 .n(16)
1510 .k(k)
1511 .a_stride(5)
1512 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1513 }
1514 }
1515
1516 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_subtile) {
1517 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1518 for (size_t k = 1; k < 2; k++) {
1519 for (uint32_t m = 1; m <= 1; m++) {
1520 for (uint32_t n = 1; n <= 16; n++) {
1521 GemmMicrokernelTester()
1522 .mr(1)
1523 .nr(16)
1524 .kr(1)
1525 .sr(1)
1526 .m(m)
1527 .n(n)
1528 .k(k)
1529 .iterations(1)
1530 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1531 }
1532 }
1533 }
1534 }
1535
1536 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2) {
1537 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1538 for (size_t k = 3; k < 4; k++) {
1539 GemmMicrokernelTester()
1540 .mr(1)
1541 .nr(16)
1542 .kr(1)
1543 .sr(1)
1544 .m(1)
1545 .n(16)
1546 .k(k)
1547 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1548 }
1549 }
1550
1551 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_strided_a) {
1552 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1553 for (size_t k = 3; k < 4; k++) {
1554 GemmMicrokernelTester()
1555 .mr(1)
1556 .nr(16)
1557 .kr(1)
1558 .sr(1)
1559 .m(1)
1560 .n(16)
1561 .k(k)
1562 .a_stride(7)
1563 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1564 }
1565 }
1566
1567 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_subtile) {
1568 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1569 for (size_t k = 3; k < 4; k++) {
1570 for (uint32_t m = 1; m <= 1; m++) {
1571 for (uint32_t n = 1; n <= 16; n++) {
1572 GemmMicrokernelTester()
1573 .mr(1)
1574 .nr(16)
1575 .kr(1)
1576 .sr(1)
1577 .m(m)
1578 .n(n)
1579 .k(k)
1580 .iterations(1)
1581 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1582 }
1583 }
1584 }
1585 }
1586
1587 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_div_2) {
1588 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1589 for (size_t k = 4; k <= 20; k += 2) {
1590 GemmMicrokernelTester()
1591 .mr(1)
1592 .nr(16)
1593 .kr(1)
1594 .sr(1)
1595 .m(1)
1596 .n(16)
1597 .k(k)
1598 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1599 }
1600 }
1601
1602 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_strided_a) {
1603 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1604 for (size_t k = 4; k <= 20; k += 2) {
1605 GemmMicrokernelTester()
1606 .mr(1)
1607 .nr(16)
1608 .kr(1)
1609 .sr(1)
1610 .m(1)
1611 .n(16)
1612 .k(k)
1613 .a_stride(23)
1614 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1615 }
1616 }
1617
1618 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_subtile) {
1619 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1620 for (size_t k = 4; k <= 20; k += 2) {
1621 for (uint32_t m = 1; m <= 1; m++) {
1622 for (uint32_t n = 1; n <= 16; n++) {
1623 GemmMicrokernelTester()
1624 .mr(1)
1625 .nr(16)
1626 .kr(1)
1627 .sr(1)
1628 .m(m)
1629 .n(n)
1630 .k(k)
1631 .iterations(1)
1632 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1633 }
1634 }
1635 }
1636 }
1637
1638 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16) {
1639 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1640 for (uint32_t n = 17; n < 32; n++) {
1641 for (size_t k = 1; k <= 10; k += 3) {
1642 GemmMicrokernelTester()
1643 .mr(1)
1644 .nr(16)
1645 .kr(1)
1646 .sr(1)
1647 .m(1)
1648 .n(16)
1649 .k(k)
1650 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1651 }
1652 }
1653 }
1654
1655 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_cn) {
1656 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1657 for (uint32_t n = 17; n < 32; n++) {
1658 for (size_t k = 1; k <= 10; k += 3) {
1659 GemmMicrokernelTester()
1660 .mr(1)
1661 .nr(16)
1662 .kr(1)
1663 .sr(1)
1664 .m(1)
1665 .n(16)
1666 .k(k)
1667 .cn_stride(19)
1668 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1669 }
1670 }
1671 }
1672
1673 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_a) {
1674 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1675 for (uint32_t n = 17; n < 32; n++) {
1676 for (size_t k = 1; k <= 10; k += 3) {
1677 GemmMicrokernelTester()
1678 .mr(1)
1679 .nr(16)
1680 .kr(1)
1681 .sr(1)
1682 .m(1)
1683 .n(n)
1684 .k(k)
1685 .a_stride(13)
1686 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1687 }
1688 }
1689 }
1690
1691 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_subtile) {
1692 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1693 for (uint32_t n = 17; n < 32; n++) {
1694 for (size_t k = 1; k <= 10; k += 3) {
1695 for (uint32_t m = 1; m <= 1; m++) {
1696 GemmMicrokernelTester()
1697 .mr(1)
1698 .nr(16)
1699 .kr(1)
1700 .sr(1)
1701 .m(m)
1702 .n(n)
1703 .k(k)
1704 .iterations(1)
1705 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1706 }
1707 }
1708 }
1709 }
1710
1711 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16) {
1712 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1713 for (uint32_t n = 32; n <= 48; n += 16) {
1714 for (size_t k = 1; k <= 10; k += 3) {
1715 GemmMicrokernelTester()
1716 .mr(1)
1717 .nr(16)
1718 .kr(1)
1719 .sr(1)
1720 .m(1)
1721 .n(16)
1722 .k(k)
1723 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1724 }
1725 }
1726 }
1727
1728 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_cn) {
1729 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1730 for (uint32_t n = 32; n <= 48; n += 16) {
1731 for (size_t k = 1; k <= 10; k += 3) {
1732 GemmMicrokernelTester()
1733 .mr(1)
1734 .nr(16)
1735 .kr(1)
1736 .sr(1)
1737 .m(1)
1738 .n(n)
1739 .k(k)
1740 .cn_stride(19)
1741 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1742 }
1743 }
1744 }
1745
1746 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_a) {
1747 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1748 for (uint32_t n = 32; n <= 48; n += 16) {
1749 for (size_t k = 1; k <= 10; k += 3) {
1750 GemmMicrokernelTester()
1751 .mr(1)
1752 .nr(16)
1753 .kr(1)
1754 .sr(1)
1755 .m(1)
1756 .n(n)
1757 .k(k)
1758 .a_stride(13)
1759 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1760 }
1761 }
1762 }
1763
1764 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_subtile) {
1765 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1766 for (uint32_t n = 32; n <= 48; n += 16) {
1767 for (size_t k = 1; k <= 10; k += 3) {
1768 for (uint32_t m = 1; m <= 1; m++) {
1769 GemmMicrokernelTester()
1770 .mr(1)
1771 .nr(16)
1772 .kr(1)
1773 .sr(1)
1774 .m(m)
1775 .n(n)
1776 .k(k)
1777 .iterations(1)
1778 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1779 }
1780 }
1781 }
1782 }
1783
1784 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, strided_cm_subtile) {
1785 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1786 for (size_t k = 1; k <= 10; k += 3) {
1787 for (uint32_t m = 1; m <= 1; m++) {
1788 for (uint32_t n = 1; n <= 16; n++) {
1789 GemmMicrokernelTester()
1790 .mr(1)
1791 .nr(16)
1792 .kr(1)
1793 .sr(1)
1794 .m(m)
1795 .n(n)
1796 .k(k)
1797 .cm_stride(19)
1798 .iterations(1)
1799 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1800 }
1801 }
1802 }
1803 }
1804
1805 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, qmin) {
1806 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1807 GemmMicrokernelTester()
1808 .mr(1)
1809 .nr(16)
1810 .kr(1)
1811 .sr(1)
1812 .m(1)
1813 .n(16)
1814 .k(2)
1815 .qmin(128)
1816 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1817 }
1818
1819 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, qmax) {
1820 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1821 GemmMicrokernelTester()
1822 .mr(1)
1823 .nr(16)
1824 .kr(1)
1825 .sr(1)
1826 .m(1)
1827 .n(16)
1828 .k(2)
1829 .qmax(128)
1830 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1831 }
1832
1833 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, strided_cm) {
1834 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1835 GemmMicrokernelTester()
1836 .mr(1)
1837 .nr(16)
1838 .kr(1)
1839 .sr(1)
1840 .m(1)
1841 .n(16)
1842 .k(2)
1843 .cm_stride(19)
1844 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32);
1845 }
1846#endif // XNN_ARCH_ARM64
1847
1848
1849#if XNN_ARCH_ARM64
Frank Barchard683f5592020-04-10 00:48:26 -07001850 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2) {
1851 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1852 GemmMicrokernelTester()
1853 .mr(4)
1854 .nr(16)
1855 .kr(1)
1856 .sr(1)
1857 .m(4)
1858 .n(16)
1859 .k(2)
1860 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1861 }
1862
1863 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, strided_cn) {
1864 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1865 GemmMicrokernelTester()
1866 .mr(4)
1867 .nr(16)
1868 .kr(1)
1869 .sr(1)
1870 .m(4)
1871 .n(16)
1872 .k(2)
1873 .cn_stride(19)
1874 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1875 }
1876
1877 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_strided_a) {
1878 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1879 GemmMicrokernelTester()
1880 .mr(4)
1881 .nr(16)
1882 .kr(1)
1883 .sr(1)
1884 .m(4)
1885 .n(16)
1886 .k(2)
1887 .a_stride(5)
1888 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1889 }
1890
1891 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile) {
1892 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1893 for (uint32_t m = 1; m <= 4; m++) {
1894 for (uint32_t n = 1; n <= 16; n++) {
1895 GemmMicrokernelTester()
1896 .mr(4)
1897 .nr(16)
1898 .kr(1)
1899 .sr(1)
1900 .m(m)
1901 .n(n)
1902 .k(2)
1903 .iterations(1)
1904 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1905 }
1906 }
1907 }
1908
1909 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_m) {
1910 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1911 for (uint32_t m = 1; m <= 4; m++) {
1912 GemmMicrokernelTester()
1913 .mr(4)
1914 .nr(16)
1915 .kr(1)
1916 .sr(1)
1917 .m(m)
1918 .n(16)
1919 .k(2)
1920 .iterations(1)
1921 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1922 }
1923 }
1924
1925 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_n) {
1926 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1927 for (uint32_t n = 1; n <= 16; n++) {
1928 GemmMicrokernelTester()
1929 .mr(4)
1930 .nr(16)
1931 .kr(1)
1932 .sr(1)
1933 .m(4)
1934 .n(n)
1935 .k(2)
1936 .iterations(1)
1937 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1938 }
1939 }
1940
1941 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2) {
1942 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1943 for (size_t k = 1; k < 2; k++) {
1944 GemmMicrokernelTester()
1945 .mr(4)
1946 .nr(16)
1947 .kr(1)
1948 .sr(1)
1949 .m(4)
1950 .n(16)
1951 .k(k)
1952 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1953 }
1954 }
1955
1956 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_strided_a) {
1957 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1958 for (size_t k = 1; k < 2; k++) {
1959 GemmMicrokernelTester()
1960 .mr(4)
1961 .nr(16)
1962 .kr(1)
1963 .sr(1)
1964 .m(4)
1965 .n(16)
1966 .k(k)
1967 .a_stride(5)
1968 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1969 }
1970 }
1971
1972 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_subtile) {
1973 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1974 for (size_t k = 1; k < 2; k++) {
1975 for (uint32_t m = 1; m <= 4; m++) {
1976 for (uint32_t n = 1; n <= 16; n++) {
1977 GemmMicrokernelTester()
1978 .mr(4)
1979 .nr(16)
1980 .kr(1)
1981 .sr(1)
1982 .m(m)
1983 .n(n)
1984 .k(k)
1985 .iterations(1)
1986 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
1987 }
1988 }
1989 }
1990 }
1991
1992 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2) {
1993 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1994 for (size_t k = 3; k < 4; k++) {
1995 GemmMicrokernelTester()
1996 .mr(4)
1997 .nr(16)
1998 .kr(1)
1999 .sr(1)
2000 .m(4)
2001 .n(16)
2002 .k(k)
2003 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2004 }
2005 }
2006
2007 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_strided_a) {
2008 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2009 for (size_t k = 3; k < 4; k++) {
2010 GemmMicrokernelTester()
2011 .mr(4)
2012 .nr(16)
2013 .kr(1)
2014 .sr(1)
2015 .m(4)
2016 .n(16)
2017 .k(k)
2018 .a_stride(7)
2019 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2020 }
2021 }
2022
2023 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_subtile) {
2024 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2025 for (size_t k = 3; k < 4; k++) {
2026 for (uint32_t m = 1; m <= 4; m++) {
2027 for (uint32_t n = 1; n <= 16; n++) {
2028 GemmMicrokernelTester()
2029 .mr(4)
2030 .nr(16)
2031 .kr(1)
2032 .sr(1)
2033 .m(m)
2034 .n(n)
2035 .k(k)
2036 .iterations(1)
2037 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2038 }
2039 }
2040 }
2041 }
2042
2043 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_div_2) {
2044 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2045 for (size_t k = 4; k <= 20; k += 2) {
2046 GemmMicrokernelTester()
2047 .mr(4)
2048 .nr(16)
2049 .kr(1)
2050 .sr(1)
2051 .m(4)
2052 .n(16)
2053 .k(k)
2054 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2055 }
2056 }
2057
2058 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_strided_a) {
2059 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2060 for (size_t k = 4; k <= 20; k += 2) {
2061 GemmMicrokernelTester()
2062 .mr(4)
2063 .nr(16)
2064 .kr(1)
2065 .sr(1)
2066 .m(4)
2067 .n(16)
2068 .k(k)
2069 .a_stride(23)
2070 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2071 }
2072 }
2073
2074 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_subtile) {
2075 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2076 for (size_t k = 4; k <= 20; k += 2) {
2077 for (uint32_t m = 1; m <= 4; m++) {
2078 for (uint32_t n = 1; n <= 16; n++) {
2079 GemmMicrokernelTester()
2080 .mr(4)
2081 .nr(16)
2082 .kr(1)
2083 .sr(1)
2084 .m(m)
2085 .n(n)
2086 .k(k)
2087 .iterations(1)
2088 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2089 }
2090 }
2091 }
2092 }
2093
2094 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16) {
2095 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2096 for (uint32_t n = 17; n < 32; n++) {
2097 for (size_t k = 1; k <= 10; k += 3) {
2098 GemmMicrokernelTester()
2099 .mr(4)
2100 .nr(16)
2101 .kr(1)
2102 .sr(1)
2103 .m(4)
2104 .n(16)
2105 .k(k)
2106 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2107 }
2108 }
2109 }
2110
2111 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_cn) {
2112 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2113 for (uint32_t n = 17; n < 32; n++) {
2114 for (size_t k = 1; k <= 10; k += 3) {
2115 GemmMicrokernelTester()
2116 .mr(4)
2117 .nr(16)
2118 .kr(1)
2119 .sr(1)
2120 .m(4)
2121 .n(16)
2122 .k(k)
2123 .cn_stride(19)
2124 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2125 }
2126 }
2127 }
2128
2129 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_a) {
2130 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2131 for (uint32_t n = 17; n < 32; n++) {
2132 for (size_t k = 1; k <= 10; k += 3) {
2133 GemmMicrokernelTester()
2134 .mr(4)
2135 .nr(16)
2136 .kr(1)
2137 .sr(1)
2138 .m(4)
2139 .n(n)
2140 .k(k)
2141 .a_stride(13)
2142 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2143 }
2144 }
2145 }
2146
2147 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_subtile) {
2148 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2149 for (uint32_t n = 17; n < 32; n++) {
2150 for (size_t k = 1; k <= 10; k += 3) {
2151 for (uint32_t m = 1; m <= 4; m++) {
2152 GemmMicrokernelTester()
2153 .mr(4)
2154 .nr(16)
2155 .kr(1)
2156 .sr(1)
2157 .m(m)
2158 .n(n)
2159 .k(k)
2160 .iterations(1)
2161 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2162 }
2163 }
2164 }
2165 }
2166
2167 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16) {
2168 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2169 for (uint32_t n = 32; n <= 48; n += 16) {
2170 for (size_t k = 1; k <= 10; k += 3) {
2171 GemmMicrokernelTester()
2172 .mr(4)
2173 .nr(16)
2174 .kr(1)
2175 .sr(1)
2176 .m(4)
2177 .n(16)
2178 .k(k)
2179 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2180 }
2181 }
2182 }
2183
2184 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_cn) {
2185 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2186 for (uint32_t n = 32; n <= 48; n += 16) {
2187 for (size_t k = 1; k <= 10; k += 3) {
2188 GemmMicrokernelTester()
2189 .mr(4)
2190 .nr(16)
2191 .kr(1)
2192 .sr(1)
2193 .m(4)
2194 .n(n)
2195 .k(k)
2196 .cn_stride(19)
2197 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2198 }
2199 }
2200 }
2201
2202 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_a) {
2203 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2204 for (uint32_t n = 32; n <= 48; n += 16) {
2205 for (size_t k = 1; k <= 10; k += 3) {
2206 GemmMicrokernelTester()
2207 .mr(4)
2208 .nr(16)
2209 .kr(1)
2210 .sr(1)
2211 .m(4)
2212 .n(n)
2213 .k(k)
2214 .a_stride(13)
2215 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2216 }
2217 }
2218 }
2219
2220 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_subtile) {
2221 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2222 for (uint32_t n = 32; n <= 48; n += 16) {
2223 for (size_t k = 1; k <= 10; k += 3) {
2224 for (uint32_t m = 1; m <= 4; m++) {
2225 GemmMicrokernelTester()
2226 .mr(4)
2227 .nr(16)
2228 .kr(1)
2229 .sr(1)
2230 .m(m)
2231 .n(n)
2232 .k(k)
2233 .iterations(1)
2234 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2235 }
2236 }
2237 }
2238 }
2239
2240 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, strided_cm_subtile) {
2241 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2242 for (size_t k = 1; k <= 10; k += 3) {
2243 for (uint32_t m = 1; m <= 4; m++) {
2244 for (uint32_t n = 1; n <= 16; n++) {
2245 GemmMicrokernelTester()
2246 .mr(4)
2247 .nr(16)
2248 .kr(1)
2249 .sr(1)
2250 .m(m)
2251 .n(n)
2252 .k(k)
2253 .cm_stride(19)
2254 .iterations(1)
2255 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2256 }
2257 }
2258 }
2259 }
2260
2261 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, qmin) {
2262 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2263 GemmMicrokernelTester()
2264 .mr(4)
2265 .nr(16)
2266 .kr(1)
2267 .sr(1)
2268 .m(4)
2269 .n(16)
2270 .k(2)
2271 .qmin(128)
2272 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2273 }
2274
2275 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, qmax) {
2276 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2277 GemmMicrokernelTester()
2278 .mr(4)
2279 .nr(16)
2280 .kr(1)
2281 .sr(1)
2282 .m(4)
2283 .n(16)
2284 .k(2)
2285 .qmax(128)
2286 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2287 }
2288
2289 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, strided_cm) {
2290 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2291 GemmMicrokernelTester()
2292 .mr(4)
2293 .nr(16)
2294 .kr(1)
2295 .sr(1)
2296 .m(4)
2297 .n(16)
2298 .k(2)
2299 .cm_stride(19)
2300 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32);
2301 }
2302#endif // XNN_ARCH_ARM64
2303
2304
2305#if XNN_ARCH_ARM64
2306 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2) {
2307 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2308 GemmMicrokernelTester()
2309 .mr(6)
2310 .nr(16)
2311 .kr(1)
2312 .sr(1)
2313 .m(6)
2314 .n(16)
2315 .k(2)
2316 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2317 }
2318
2319 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, strided_cn) {
2320 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2321 GemmMicrokernelTester()
2322 .mr(6)
2323 .nr(16)
2324 .kr(1)
2325 .sr(1)
2326 .m(6)
2327 .n(16)
2328 .k(2)
2329 .cn_stride(19)
2330 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2331 }
2332
2333 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_strided_a) {
2334 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2335 GemmMicrokernelTester()
2336 .mr(6)
2337 .nr(16)
2338 .kr(1)
2339 .sr(1)
2340 .m(6)
2341 .n(16)
2342 .k(2)
2343 .a_stride(5)
2344 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2345 }
2346
2347 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile) {
2348 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2349 for (uint32_t m = 1; m <= 6; m++) {
2350 for (uint32_t n = 1; n <= 16; n++) {
2351 GemmMicrokernelTester()
2352 .mr(6)
2353 .nr(16)
2354 .kr(1)
2355 .sr(1)
2356 .m(m)
2357 .n(n)
2358 .k(2)
2359 .iterations(1)
2360 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2361 }
2362 }
2363 }
2364
2365 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_m) {
2366 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2367 for (uint32_t m = 1; m <= 6; m++) {
2368 GemmMicrokernelTester()
2369 .mr(6)
2370 .nr(16)
2371 .kr(1)
2372 .sr(1)
2373 .m(m)
2374 .n(16)
2375 .k(2)
2376 .iterations(1)
2377 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2378 }
2379 }
2380
2381 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_n) {
2382 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2383 for (uint32_t n = 1; n <= 16; n++) {
2384 GemmMicrokernelTester()
2385 .mr(6)
2386 .nr(16)
2387 .kr(1)
2388 .sr(1)
2389 .m(6)
2390 .n(n)
2391 .k(2)
2392 .iterations(1)
2393 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2394 }
2395 }
2396
2397 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2) {
2398 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2399 for (size_t k = 1; k < 2; k++) {
2400 GemmMicrokernelTester()
2401 .mr(6)
2402 .nr(16)
2403 .kr(1)
2404 .sr(1)
2405 .m(6)
2406 .n(16)
2407 .k(k)
2408 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2409 }
2410 }
2411
2412 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_strided_a) {
2413 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2414 for (size_t k = 1; k < 2; k++) {
2415 GemmMicrokernelTester()
2416 .mr(6)
2417 .nr(16)
2418 .kr(1)
2419 .sr(1)
2420 .m(6)
2421 .n(16)
2422 .k(k)
2423 .a_stride(5)
2424 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2425 }
2426 }
2427
2428 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_subtile) {
2429 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2430 for (size_t k = 1; k < 2; k++) {
2431 for (uint32_t m = 1; m <= 6; m++) {
2432 for (uint32_t n = 1; n <= 16; n++) {
2433 GemmMicrokernelTester()
2434 .mr(6)
2435 .nr(16)
2436 .kr(1)
2437 .sr(1)
2438 .m(m)
2439 .n(n)
2440 .k(k)
2441 .iterations(1)
2442 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2443 }
2444 }
2445 }
2446 }
2447
2448 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2) {
2449 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2450 for (size_t k = 3; k < 4; k++) {
2451 GemmMicrokernelTester()
2452 .mr(6)
2453 .nr(16)
2454 .kr(1)
2455 .sr(1)
2456 .m(6)
2457 .n(16)
2458 .k(k)
2459 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2460 }
2461 }
2462
2463 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_strided_a) {
2464 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2465 for (size_t k = 3; k < 4; k++) {
2466 GemmMicrokernelTester()
2467 .mr(6)
2468 .nr(16)
2469 .kr(1)
2470 .sr(1)
2471 .m(6)
2472 .n(16)
2473 .k(k)
2474 .a_stride(7)
2475 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2476 }
2477 }
2478
2479 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_subtile) {
2480 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2481 for (size_t k = 3; k < 4; k++) {
2482 for (uint32_t m = 1; m <= 6; m++) {
2483 for (uint32_t n = 1; n <= 16; n++) {
2484 GemmMicrokernelTester()
2485 .mr(6)
2486 .nr(16)
2487 .kr(1)
2488 .sr(1)
2489 .m(m)
2490 .n(n)
2491 .k(k)
2492 .iterations(1)
2493 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2494 }
2495 }
2496 }
2497 }
2498
2499 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_div_2) {
2500 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2501 for (size_t k = 4; k <= 20; k += 2) {
2502 GemmMicrokernelTester()
2503 .mr(6)
2504 .nr(16)
2505 .kr(1)
2506 .sr(1)
2507 .m(6)
2508 .n(16)
2509 .k(k)
2510 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2511 }
2512 }
2513
2514 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_strided_a) {
2515 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2516 for (size_t k = 4; k <= 20; k += 2) {
2517 GemmMicrokernelTester()
2518 .mr(6)
2519 .nr(16)
2520 .kr(1)
2521 .sr(1)
2522 .m(6)
2523 .n(16)
2524 .k(k)
2525 .a_stride(23)
2526 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2527 }
2528 }
2529
2530 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_subtile) {
2531 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2532 for (size_t k = 4; k <= 20; k += 2) {
2533 for (uint32_t m = 1; m <= 6; m++) {
2534 for (uint32_t n = 1; n <= 16; n++) {
2535 GemmMicrokernelTester()
2536 .mr(6)
2537 .nr(16)
2538 .kr(1)
2539 .sr(1)
2540 .m(m)
2541 .n(n)
2542 .k(k)
2543 .iterations(1)
2544 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2545 }
2546 }
2547 }
2548 }
2549
2550 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16) {
2551 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2552 for (uint32_t n = 17; n < 32; n++) {
2553 for (size_t k = 1; k <= 10; k += 3) {
2554 GemmMicrokernelTester()
2555 .mr(6)
2556 .nr(16)
2557 .kr(1)
2558 .sr(1)
2559 .m(6)
2560 .n(16)
2561 .k(k)
2562 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2563 }
2564 }
2565 }
2566
2567 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_cn) {
2568 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2569 for (uint32_t n = 17; n < 32; n++) {
2570 for (size_t k = 1; k <= 10; k += 3) {
2571 GemmMicrokernelTester()
2572 .mr(6)
2573 .nr(16)
2574 .kr(1)
2575 .sr(1)
2576 .m(6)
2577 .n(16)
2578 .k(k)
2579 .cn_stride(19)
2580 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2581 }
2582 }
2583 }
2584
2585 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_a) {
2586 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2587 for (uint32_t n = 17; n < 32; n++) {
2588 for (size_t k = 1; k <= 10; k += 3) {
2589 GemmMicrokernelTester()
2590 .mr(6)
2591 .nr(16)
2592 .kr(1)
2593 .sr(1)
2594 .m(6)
2595 .n(n)
2596 .k(k)
2597 .a_stride(13)
2598 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2599 }
2600 }
2601 }
2602
2603 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_subtile) {
2604 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2605 for (uint32_t n = 17; n < 32; n++) {
2606 for (size_t k = 1; k <= 10; k += 3) {
2607 for (uint32_t m = 1; m <= 6; m++) {
2608 GemmMicrokernelTester()
2609 .mr(6)
2610 .nr(16)
2611 .kr(1)
2612 .sr(1)
2613 .m(m)
2614 .n(n)
2615 .k(k)
2616 .iterations(1)
2617 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2618 }
2619 }
2620 }
2621 }
2622
2623 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16) {
2624 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2625 for (uint32_t n = 32; n <= 48; n += 16) {
2626 for (size_t k = 1; k <= 10; k += 3) {
2627 GemmMicrokernelTester()
2628 .mr(6)
2629 .nr(16)
2630 .kr(1)
2631 .sr(1)
2632 .m(6)
2633 .n(16)
2634 .k(k)
2635 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2636 }
2637 }
2638 }
2639
2640 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_cn) {
2641 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2642 for (uint32_t n = 32; n <= 48; n += 16) {
2643 for (size_t k = 1; k <= 10; k += 3) {
2644 GemmMicrokernelTester()
2645 .mr(6)
2646 .nr(16)
2647 .kr(1)
2648 .sr(1)
2649 .m(6)
2650 .n(n)
2651 .k(k)
2652 .cn_stride(19)
2653 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2654 }
2655 }
2656 }
2657
2658 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_a) {
2659 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2660 for (uint32_t n = 32; n <= 48; n += 16) {
2661 for (size_t k = 1; k <= 10; k += 3) {
2662 GemmMicrokernelTester()
2663 .mr(6)
2664 .nr(16)
2665 .kr(1)
2666 .sr(1)
2667 .m(6)
2668 .n(n)
2669 .k(k)
2670 .a_stride(13)
2671 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2672 }
2673 }
2674 }
2675
2676 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_subtile) {
2677 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2678 for (uint32_t n = 32; n <= 48; n += 16) {
2679 for (size_t k = 1; k <= 10; k += 3) {
2680 for (uint32_t m = 1; m <= 6; m++) {
2681 GemmMicrokernelTester()
2682 .mr(6)
2683 .nr(16)
2684 .kr(1)
2685 .sr(1)
2686 .m(m)
2687 .n(n)
2688 .k(k)
2689 .iterations(1)
2690 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2691 }
2692 }
2693 }
2694 }
2695
2696 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, strided_cm_subtile) {
2697 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2698 for (size_t k = 1; k <= 10; k += 3) {
2699 for (uint32_t m = 1; m <= 6; m++) {
2700 for (uint32_t n = 1; n <= 16; n++) {
2701 GemmMicrokernelTester()
2702 .mr(6)
2703 .nr(16)
2704 .kr(1)
2705 .sr(1)
2706 .m(m)
2707 .n(n)
2708 .k(k)
2709 .cm_stride(19)
2710 .iterations(1)
2711 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2712 }
2713 }
2714 }
2715 }
2716
2717 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, qmin) {
2718 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2719 GemmMicrokernelTester()
2720 .mr(6)
2721 .nr(16)
2722 .kr(1)
2723 .sr(1)
2724 .m(6)
2725 .n(16)
2726 .k(2)
2727 .qmin(128)
2728 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2729 }
2730
2731 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, qmax) {
2732 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2733 GemmMicrokernelTester()
2734 .mr(6)
2735 .nr(16)
2736 .kr(1)
2737 .sr(1)
2738 .m(6)
2739 .n(16)
2740 .k(2)
2741 .qmax(128)
2742 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2743 }
2744
2745 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, strided_cm) {
2746 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2747 GemmMicrokernelTester()
2748 .mr(6)
2749 .nr(16)
2750 .kr(1)
2751 .sr(1)
2752 .m(6)
2753 .n(16)
2754 .k(2)
2755 .cm_stride(19)
2756 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32);
2757 }
2758#endif // XNN_ARCH_ARM64