blob: 356e72fe41ec8c0ac7533dec765d54eb741688cd [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
Marat Dukhan163a7e62020-04-09 04:19:26 -070010// Specification: test/f16-gemm-minmax.yaml
XNNPACK Teamb455b122019-09-27 18:10:33 -070011// Generator: tools/generate-gemm-test.py
12
13
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include "gemm-microkernel-tester.h"
24
25
Frank Barcharde4d3f762021-12-23 15:31:43 -080026#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard97374612021-06-07 11:51:07 -070027 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_eq_2) {
28 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
29 GemmMicrokernelTester()
30 .mr(6)
31 .nr(16)
32 .kr(1)
33 .sr(1)
34 .m(6)
35 .n(16)
36 .k(2)
Marat Dukhanc4302c22022-01-06 19:27:03 -080037 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -070038 }
39
40 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
42 GemmMicrokernelTester()
43 .mr(6)
44 .nr(16)
45 .kr(1)
46 .sr(1)
47 .m(6)
48 .n(16)
49 .k(2)
50 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -080051 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -070052 }
53
54 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_eq_2_strided_a) {
55 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
56 GemmMicrokernelTester()
57 .mr(6)
58 .nr(16)
59 .kr(1)
60 .sr(1)
61 .m(6)
62 .n(16)
63 .k(2)
64 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -080065 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -070066 }
67
68 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_eq_2_subtile) {
69 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 16; n++) {
71 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard97374612021-06-07 11:51:07 -070072 GemmMicrokernelTester()
73 .mr(6)
74 .nr(16)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(2)
80 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -080081 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -070082 }
83 }
84 }
85
86 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_eq_2_subtile_m) {
87 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
88 for (uint32_t m = 1; m <= 6; m++) {
89 GemmMicrokernelTester()
90 .mr(6)
91 .nr(16)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(16)
96 .k(2)
97 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -080098 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -070099 }
100 }
101
102 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_eq_2_subtile_n) {
103 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
104 for (uint32_t n = 1; n <= 16; n++) {
105 GemmMicrokernelTester()
106 .mr(6)
107 .nr(16)
108 .kr(1)
109 .sr(1)
110 .m(6)
111 .n(n)
112 .k(2)
113 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800114 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700115 }
116 }
117
118 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_lt_2) {
119 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
120 for (size_t k = 1; k < 2; k++) {
121 GemmMicrokernelTester()
122 .mr(6)
123 .nr(16)
124 .kr(1)
125 .sr(1)
126 .m(6)
127 .n(16)
128 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800129 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700130 }
131 }
132
133 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_lt_2_strided_a) {
134 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
135 for (size_t k = 1; k < 2; k++) {
136 GemmMicrokernelTester()
137 .mr(6)
138 .nr(16)
139 .kr(1)
140 .sr(1)
141 .m(6)
142 .n(16)
143 .k(k)
144 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800145 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700146 }
147 }
148
149 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_lt_2_subtile) {
150 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
151 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800152 for (uint32_t n = 1; n <= 16; n++) {
153 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard97374612021-06-07 11:51:07 -0700154 GemmMicrokernelTester()
155 .mr(6)
156 .nr(16)
157 .kr(1)
158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800163 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700164 }
165 }
166 }
167 }
168
169 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_gt_2) {
170 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
171 for (size_t k = 3; k < 4; k++) {
172 GemmMicrokernelTester()
173 .mr(6)
174 .nr(16)
175 .kr(1)
176 .sr(1)
177 .m(6)
178 .n(16)
179 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800180 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700181 }
182 }
183
184 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_gt_2_strided_a) {
185 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
186 for (size_t k = 3; k < 4; k++) {
187 GemmMicrokernelTester()
188 .mr(6)
189 .nr(16)
190 .kr(1)
191 .sr(1)
192 .m(6)
193 .n(16)
194 .k(k)
195 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800196 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700197 }
198 }
199
200 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_gt_2_subtile) {
201 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
202 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800203 for (uint32_t n = 1; n <= 16; n++) {
204 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard97374612021-06-07 11:51:07 -0700205 GemmMicrokernelTester()
206 .mr(6)
207 .nr(16)
208 .kr(1)
209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800214 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700215 }
216 }
217 }
218 }
219
220 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_div_2) {
221 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
222 for (size_t k = 4; k <= 20; k += 2) {
223 GemmMicrokernelTester()
224 .mr(6)
225 .nr(16)
226 .kr(1)
227 .sr(1)
228 .m(6)
229 .n(16)
230 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800231 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700232 }
233 }
234
235 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_div_2_strided_a) {
236 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
237 for (size_t k = 4; k <= 20; k += 2) {
238 GemmMicrokernelTester()
239 .mr(6)
240 .nr(16)
241 .kr(1)
242 .sr(1)
243 .m(6)
244 .n(16)
245 .k(k)
246 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800247 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700248 }
249 }
250
251 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, k_div_2_subtile) {
252 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
253 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800254 for (uint32_t n = 1; n <= 16; n++) {
255 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard97374612021-06-07 11:51:07 -0700256 GemmMicrokernelTester()
257 .mr(6)
258 .nr(16)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800265 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700266 }
267 }
268 }
269 }
270
271 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_gt_16) {
272 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
273 for (uint32_t n = 17; n < 32; n++) {
274 for (size_t k = 1; k <= 10; k += 3) {
275 GemmMicrokernelTester()
276 .mr(6)
277 .nr(16)
278 .kr(1)
279 .sr(1)
280 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Frank Barchard97374612021-06-07 11:51:07 -0700282 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800283 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700284 }
285 }
286 }
287
288 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_gt_16_strided_cn) {
289 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
290 for (uint32_t n = 17; n < 32; n++) {
291 for (size_t k = 1; k <= 10; k += 3) {
292 GemmMicrokernelTester()
293 .mr(6)
294 .nr(16)
295 .kr(1)
296 .sr(1)
297 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800298 .n(n)
Frank Barchard97374612021-06-07 11:51:07 -0700299 .k(k)
300 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800301 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700302 }
303 }
304 }
305
306 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_gt_16_strided_a) {
307 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
308 for (uint32_t n = 17; n < 32; n++) {
309 for (size_t k = 1; k <= 10; k += 3) {
310 GemmMicrokernelTester()
311 .mr(6)
312 .nr(16)
313 .kr(1)
314 .sr(1)
315 .m(6)
316 .n(n)
317 .k(k)
318 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800319 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700320 }
321 }
322 }
323
324 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_gt_16_subtile) {
325 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
326 for (uint32_t n = 17; n < 32; n++) {
327 for (size_t k = 1; k <= 10; k += 3) {
328 for (uint32_t m = 1; m <= 6; m++) {
329 GemmMicrokernelTester()
330 .mr(6)
331 .nr(16)
332 .kr(1)
333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800338 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700339 }
340 }
341 }
342 }
343
344 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_div_16) {
345 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
346 for (uint32_t n = 32; n <= 48; n += 16) {
347 for (size_t k = 1; k <= 10; k += 3) {
348 GemmMicrokernelTester()
349 .mr(6)
350 .nr(16)
351 .kr(1)
352 .sr(1)
353 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800354 .n(n)
Frank Barchard97374612021-06-07 11:51:07 -0700355 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800356 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700357 }
358 }
359 }
360
361 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_div_16_strided_cn) {
362 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
363 for (uint32_t n = 32; n <= 48; n += 16) {
364 for (size_t k = 1; k <= 10; k += 3) {
365 GemmMicrokernelTester()
366 .mr(6)
367 .nr(16)
368 .kr(1)
369 .sr(1)
370 .m(6)
371 .n(n)
372 .k(k)
373 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800374 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700375 }
376 }
377 }
378
379 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_div_16_strided_a) {
380 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
381 for (uint32_t n = 32; n <= 48; n += 16) {
382 for (size_t k = 1; k <= 10; k += 3) {
383 GemmMicrokernelTester()
384 .mr(6)
385 .nr(16)
386 .kr(1)
387 .sr(1)
388 .m(6)
389 .n(n)
390 .k(k)
391 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800392 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700393 }
394 }
395 }
396
397 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, n_div_16_subtile) {
398 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
399 for (uint32_t n = 32; n <= 48; n += 16) {
400 for (size_t k = 1; k <= 10; k += 3) {
401 for (uint32_t m = 1; m <= 6; m++) {
402 GemmMicrokernelTester()
403 .mr(6)
404 .nr(16)
405 .kr(1)
406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800411 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700412 }
413 }
414 }
415 }
416
417 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
419 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800420 for (uint32_t n = 1; n <= 16; n++) {
421 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard97374612021-06-07 11:51:07 -0700422 GemmMicrokernelTester()
423 .mr(6)
424 .nr(16)
425 .kr(1)
426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(19)
431 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800432 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700433 }
434 }
435 }
436 }
437
438 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, qmin) {
439 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
440 GemmMicrokernelTester()
441 .mr(6)
442 .nr(16)
443 .kr(1)
444 .sr(1)
445 .m(6)
446 .n(16)
447 .k(2)
448 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800449 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700450 }
451
452 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, qmax) {
453 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
454 GemmMicrokernelTester()
455 .mr(6)
456 .nr(16)
457 .kr(1)
458 .sr(1)
459 .m(6)
460 .n(16)
461 .k(2)
462 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800463 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700464 }
465
466 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A75, strided_cm) {
467 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
468 GemmMicrokernelTester()
469 .mr(6)
470 .nr(16)
471 .kr(1)
472 .sr(1)
473 .m(6)
474 .n(16)
475 .k(2)
476 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800477 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard97374612021-06-07 11:51:07 -0700478 }
Frank Barcharde4d3f762021-12-23 15:31:43 -0800479#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard97374612021-06-07 11:51:07 -0700480
481
Frank Barcharde4d3f762021-12-23 15:31:43 -0800482#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard80fc5f42021-06-07 10:43:16 -0700483 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_eq_2) {
484 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
485 GemmMicrokernelTester()
486 .mr(6)
487 .nr(16)
488 .kr(1)
489 .sr(1)
490 .m(6)
491 .n(16)
492 .k(2)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800493 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700494 }
495
496 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, strided_cn) {
497 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
498 GemmMicrokernelTester()
499 .mr(6)
500 .nr(16)
501 .kr(1)
502 .sr(1)
503 .m(6)
504 .n(16)
505 .k(2)
506 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800507 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700508 }
509
510 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_eq_2_strided_a) {
511 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
512 GemmMicrokernelTester()
513 .mr(6)
514 .nr(16)
515 .kr(1)
516 .sr(1)
517 .m(6)
518 .n(16)
519 .k(2)
520 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800521 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700522 }
523
524 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_eq_2_subtile) {
525 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800526 for (uint32_t n = 1; n <= 16; n++) {
527 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard80fc5f42021-06-07 10:43:16 -0700528 GemmMicrokernelTester()
529 .mr(6)
530 .nr(16)
531 .kr(1)
532 .sr(1)
533 .m(m)
534 .n(n)
535 .k(2)
536 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800537 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700538 }
539 }
540 }
541
542 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_eq_2_subtile_m) {
543 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
544 for (uint32_t m = 1; m <= 6; m++) {
545 GemmMicrokernelTester()
546 .mr(6)
547 .nr(16)
548 .kr(1)
549 .sr(1)
550 .m(m)
551 .n(16)
552 .k(2)
553 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800554 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700555 }
556 }
557
558 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_eq_2_subtile_n) {
559 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
560 for (uint32_t n = 1; n <= 16; n++) {
561 GemmMicrokernelTester()
562 .mr(6)
563 .nr(16)
564 .kr(1)
565 .sr(1)
566 .m(6)
567 .n(n)
568 .k(2)
569 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800570 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700571 }
572 }
573
574 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_lt_2) {
575 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
576 for (size_t k = 1; k < 2; k++) {
577 GemmMicrokernelTester()
578 .mr(6)
579 .nr(16)
580 .kr(1)
581 .sr(1)
582 .m(6)
583 .n(16)
584 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800585 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700586 }
587 }
588
589 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_lt_2_strided_a) {
590 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
591 for (size_t k = 1; k < 2; k++) {
592 GemmMicrokernelTester()
593 .mr(6)
594 .nr(16)
595 .kr(1)
596 .sr(1)
597 .m(6)
598 .n(16)
599 .k(k)
600 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800601 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700602 }
603 }
604
605 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_lt_2_subtile) {
606 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
607 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800608 for (uint32_t n = 1; n <= 16; n++) {
609 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard80fc5f42021-06-07 10:43:16 -0700610 GemmMicrokernelTester()
611 .mr(6)
612 .nr(16)
613 .kr(1)
614 .sr(1)
615 .m(m)
616 .n(n)
617 .k(k)
618 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800619 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700620 }
621 }
622 }
623 }
624
625 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_gt_2) {
626 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
627 for (size_t k = 3; k < 4; k++) {
628 GemmMicrokernelTester()
629 .mr(6)
630 .nr(16)
631 .kr(1)
632 .sr(1)
633 .m(6)
634 .n(16)
635 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800636 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700637 }
638 }
639
640 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_gt_2_strided_a) {
641 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
642 for (size_t k = 3; k < 4; k++) {
643 GemmMicrokernelTester()
644 .mr(6)
645 .nr(16)
646 .kr(1)
647 .sr(1)
648 .m(6)
649 .n(16)
650 .k(k)
651 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800652 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700653 }
654 }
655
656 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_gt_2_subtile) {
657 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
658 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800659 for (uint32_t n = 1; n <= 16; n++) {
660 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard80fc5f42021-06-07 10:43:16 -0700661 GemmMicrokernelTester()
662 .mr(6)
663 .nr(16)
664 .kr(1)
665 .sr(1)
666 .m(m)
667 .n(n)
668 .k(k)
669 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800670 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700671 }
672 }
673 }
674 }
675
676 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_div_2) {
677 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
678 for (size_t k = 4; k <= 20; k += 2) {
679 GemmMicrokernelTester()
680 .mr(6)
681 .nr(16)
682 .kr(1)
683 .sr(1)
684 .m(6)
685 .n(16)
686 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800687 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700688 }
689 }
690
691 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_div_2_strided_a) {
692 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
693 for (size_t k = 4; k <= 20; k += 2) {
694 GemmMicrokernelTester()
695 .mr(6)
696 .nr(16)
697 .kr(1)
698 .sr(1)
699 .m(6)
700 .n(16)
701 .k(k)
702 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800703 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700704 }
705 }
706
707 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, k_div_2_subtile) {
708 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
709 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800710 for (uint32_t n = 1; n <= 16; n++) {
711 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard80fc5f42021-06-07 10:43:16 -0700712 GemmMicrokernelTester()
713 .mr(6)
714 .nr(16)
715 .kr(1)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800721 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700722 }
723 }
724 }
725 }
726
727 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_gt_16) {
728 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
729 for (uint32_t n = 17; n < 32; n++) {
730 for (size_t k = 1; k <= 10; k += 3) {
731 GemmMicrokernelTester()
732 .mr(6)
733 .nr(16)
734 .kr(1)
735 .sr(1)
736 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800737 .n(n)
Frank Barchard80fc5f42021-06-07 10:43:16 -0700738 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800739 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700740 }
741 }
742 }
743
744 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_gt_16_strided_cn) {
745 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
746 for (uint32_t n = 17; n < 32; n++) {
747 for (size_t k = 1; k <= 10; k += 3) {
748 GemmMicrokernelTester()
749 .mr(6)
750 .nr(16)
751 .kr(1)
752 .sr(1)
753 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800754 .n(n)
Frank Barchard80fc5f42021-06-07 10:43:16 -0700755 .k(k)
756 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800757 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700758 }
759 }
760 }
761
762 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_gt_16_strided_a) {
763 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
764 for (uint32_t n = 17; n < 32; n++) {
765 for (size_t k = 1; k <= 10; k += 3) {
766 GemmMicrokernelTester()
767 .mr(6)
768 .nr(16)
769 .kr(1)
770 .sr(1)
771 .m(6)
772 .n(n)
773 .k(k)
774 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800775 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700776 }
777 }
778 }
779
780 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_gt_16_subtile) {
781 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
782 for (uint32_t n = 17; n < 32; n++) {
783 for (size_t k = 1; k <= 10; k += 3) {
784 for (uint32_t m = 1; m <= 6; m++) {
785 GemmMicrokernelTester()
786 .mr(6)
787 .nr(16)
788 .kr(1)
789 .sr(1)
790 .m(m)
791 .n(n)
792 .k(k)
793 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800794 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700795 }
796 }
797 }
798 }
799
800 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_div_16) {
801 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
802 for (uint32_t n = 32; n <= 48; n += 16) {
803 for (size_t k = 1; k <= 10; k += 3) {
804 GemmMicrokernelTester()
805 .mr(6)
806 .nr(16)
807 .kr(1)
808 .sr(1)
809 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800810 .n(n)
Frank Barchard80fc5f42021-06-07 10:43:16 -0700811 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800812 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700813 }
814 }
815 }
816
817 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_div_16_strided_cn) {
818 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
819 for (uint32_t n = 32; n <= 48; n += 16) {
820 for (size_t k = 1; k <= 10; k += 3) {
821 GemmMicrokernelTester()
822 .mr(6)
823 .nr(16)
824 .kr(1)
825 .sr(1)
826 .m(6)
827 .n(n)
828 .k(k)
829 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800830 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700831 }
832 }
833 }
834
835 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_div_16_strided_a) {
836 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
837 for (uint32_t n = 32; n <= 48; n += 16) {
838 for (size_t k = 1; k <= 10; k += 3) {
839 GemmMicrokernelTester()
840 .mr(6)
841 .nr(16)
842 .kr(1)
843 .sr(1)
844 .m(6)
845 .n(n)
846 .k(k)
847 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800848 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700849 }
850 }
851 }
852
853 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, n_div_16_subtile) {
854 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
855 for (uint32_t n = 32; n <= 48; n += 16) {
856 for (size_t k = 1; k <= 10; k += 3) {
857 for (uint32_t m = 1; m <= 6; m++) {
858 GemmMicrokernelTester()
859 .mr(6)
860 .nr(16)
861 .kr(1)
862 .sr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800867 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700868 }
869 }
870 }
871 }
872
873 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, strided_cm_subtile) {
874 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
875 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800876 for (uint32_t n = 1; n <= 16; n++) {
877 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard80fc5f42021-06-07 10:43:16 -0700878 GemmMicrokernelTester()
879 .mr(6)
880 .nr(16)
881 .kr(1)
882 .sr(1)
883 .m(m)
884 .n(n)
885 .k(k)
886 .cm_stride(19)
887 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800888 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700889 }
890 }
891 }
892 }
893
894 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, qmin) {
895 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
896 GemmMicrokernelTester()
897 .mr(6)
898 .nr(16)
899 .kr(1)
900 .sr(1)
901 .m(6)
902 .n(16)
903 .k(2)
904 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800905 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700906 }
907
908 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, qmax) {
909 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
910 GemmMicrokernelTester()
911 .mr(6)
912 .nr(16)
913 .kr(1)
914 .sr(1)
915 .m(6)
916 .n(16)
917 .k(2)
918 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800919 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700920 }
921
922 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_CORTEX_A55, strided_cm) {
923 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
924 GemmMicrokernelTester()
925 .mr(6)
926 .nr(16)
927 .kr(1)
928 .sr(1)
929 .m(6)
930 .n(16)
931 .k(2)
932 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800933 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700934 }
Frank Barcharde4d3f762021-12-23 15:31:43 -0800935#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard80fc5f42021-06-07 10:43:16 -0700936
937
938#if XNN_ARCH_ARM64
Frank Barchard1f4e4612020-04-13 18:24:54 -0700939 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4) {
940 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
941 GemmMicrokernelTester()
942 .mr(1)
943 .nr(8)
944 .kr(1)
945 .sr(1)
946 .m(1)
947 .n(8)
948 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800949 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -0700950 }
951
952 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, strided_cn) {
953 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
954 GemmMicrokernelTester()
955 .mr(1)
956 .nr(8)
957 .kr(1)
958 .sr(1)
959 .m(1)
960 .n(8)
961 .k(4)
962 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800963 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -0700964 }
965
966 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
967 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
968 GemmMicrokernelTester()
969 .mr(1)
970 .nr(8)
971 .kr(1)
972 .sr(1)
973 .m(1)
974 .n(8)
975 .k(4)
976 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800977 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -0700978 }
979
980 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
981 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800982 for (uint32_t n = 1; n <= 8; n++) {
983 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1f4e4612020-04-13 18:24:54 -0700984 GemmMicrokernelTester()
985 .mr(1)
986 .nr(8)
987 .kr(1)
988 .sr(1)
989 .m(m)
990 .n(n)
991 .k(4)
992 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800993 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -0700994 }
995 }
996 }
997
998 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
999 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1000 for (uint32_t m = 1; m <= 1; m++) {
1001 GemmMicrokernelTester()
1002 .mr(1)
1003 .nr(8)
1004 .kr(1)
1005 .sr(1)
1006 .m(m)
1007 .n(8)
1008 .k(4)
1009 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001010 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001011 }
1012 }
1013
1014 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
1015 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1016 for (uint32_t n = 1; n <= 8; n++) {
1017 GemmMicrokernelTester()
1018 .mr(1)
1019 .nr(8)
1020 .kr(1)
1021 .sr(1)
1022 .m(1)
1023 .n(n)
1024 .k(4)
1025 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001026 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001027 }
1028 }
1029
1030 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_lt_4) {
1031 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1032 for (size_t k = 1; k < 4; k++) {
1033 GemmMicrokernelTester()
1034 .mr(1)
1035 .nr(8)
1036 .kr(1)
1037 .sr(1)
1038 .m(1)
1039 .n(8)
1040 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001041 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001042 }
1043 }
1044
1045 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
1046 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1047 for (size_t k = 1; k < 4; k++) {
1048 GemmMicrokernelTester()
1049 .mr(1)
1050 .nr(8)
1051 .kr(1)
1052 .sr(1)
1053 .m(1)
1054 .n(8)
1055 .k(k)
1056 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001057 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001058 }
1059 }
1060
1061 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
1062 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1063 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001064 for (uint32_t n = 1; n <= 8; n++) {
1065 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1f4e4612020-04-13 18:24:54 -07001066 GemmMicrokernelTester()
1067 .mr(1)
1068 .nr(8)
1069 .kr(1)
1070 .sr(1)
1071 .m(m)
1072 .n(n)
1073 .k(k)
1074 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001075 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001076 }
1077 }
1078 }
1079 }
1080
1081 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_gt_4) {
1082 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1083 for (size_t k = 5; k < 8; k++) {
1084 GemmMicrokernelTester()
1085 .mr(1)
1086 .nr(8)
1087 .kr(1)
1088 .sr(1)
1089 .m(1)
1090 .n(8)
1091 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001092 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001093 }
1094 }
1095
1096 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
1097 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1098 for (size_t k = 5; k < 8; k++) {
1099 GemmMicrokernelTester()
1100 .mr(1)
1101 .nr(8)
1102 .kr(1)
1103 .sr(1)
1104 .m(1)
1105 .n(8)
1106 .k(k)
1107 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001108 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001109 }
1110 }
1111
1112 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
1113 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1114 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001115 for (uint32_t n = 1; n <= 8; n++) {
1116 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1f4e4612020-04-13 18:24:54 -07001117 GemmMicrokernelTester()
1118 .mr(1)
1119 .nr(8)
1120 .kr(1)
1121 .sr(1)
1122 .m(m)
1123 .n(n)
1124 .k(k)
1125 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001126 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001127 }
1128 }
1129 }
1130 }
1131
1132 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_div_4) {
1133 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1134 for (size_t k = 8; k <= 40; k += 4) {
1135 GemmMicrokernelTester()
1136 .mr(1)
1137 .nr(8)
1138 .kr(1)
1139 .sr(1)
1140 .m(1)
1141 .n(8)
1142 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001143 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001144 }
1145 }
1146
1147 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
1148 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1149 for (size_t k = 8; k <= 40; k += 4) {
1150 GemmMicrokernelTester()
1151 .mr(1)
1152 .nr(8)
1153 .kr(1)
1154 .sr(1)
1155 .m(1)
1156 .n(8)
1157 .k(k)
1158 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001159 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001160 }
1161 }
1162
1163 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
1164 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1165 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001166 for (uint32_t n = 1; n <= 8; n++) {
1167 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1f4e4612020-04-13 18:24:54 -07001168 GemmMicrokernelTester()
1169 .mr(1)
1170 .nr(8)
1171 .kr(1)
1172 .sr(1)
1173 .m(m)
1174 .n(n)
1175 .k(k)
1176 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001177 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001178 }
1179 }
1180 }
1181 }
1182
1183 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8) {
1184 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 20; k += 5) {
1187 GemmMicrokernelTester()
1188 .mr(1)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001193 .n(n)
Frank Barchard1f4e4612020-04-13 18:24:54 -07001194 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001195 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001196 }
1197 }
1198 }
1199
1200 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
1201 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1202 for (uint32_t n = 9; n < 16; n++) {
1203 for (size_t k = 1; k <= 20; k += 5) {
1204 GemmMicrokernelTester()
1205 .mr(1)
1206 .nr(8)
1207 .kr(1)
1208 .sr(1)
1209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Frank Barchard1f4e4612020-04-13 18:24:54 -07001211 .k(k)
1212 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001213 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001214 }
1215 }
1216 }
1217
1218 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
1219 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1220 for (uint32_t n = 9; n < 16; n++) {
1221 for (size_t k = 1; k <= 20; k += 5) {
1222 GemmMicrokernelTester()
1223 .mr(1)
1224 .nr(8)
1225 .kr(1)
1226 .sr(1)
1227 .m(1)
1228 .n(n)
1229 .k(k)
1230 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001231 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001232 }
1233 }
1234 }
1235
1236 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1238 for (uint32_t n = 9; n < 16; n++) {
1239 for (size_t k = 1; k <= 20; k += 5) {
1240 for (uint32_t m = 1; m <= 1; m++) {
1241 GemmMicrokernelTester()
1242 .mr(1)
1243 .nr(8)
1244 .kr(1)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001250 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001251 }
1252 }
1253 }
1254 }
1255
1256 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8) {
1257 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 20; k += 5) {
1260 GemmMicrokernelTester()
1261 .mr(1)
1262 .nr(8)
1263 .kr(1)
1264 .sr(1)
1265 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001266 .n(n)
Frank Barchard1f4e4612020-04-13 18:24:54 -07001267 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001268 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001269 }
1270 }
1271 }
1272
1273 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
1274 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1275 for (uint32_t n = 16; n <= 24; n += 8) {
1276 for (size_t k = 1; k <= 20; k += 5) {
1277 GemmMicrokernelTester()
1278 .mr(1)
1279 .nr(8)
1280 .kr(1)
1281 .sr(1)
1282 .m(1)
1283 .n(n)
1284 .k(k)
1285 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001286 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001287 }
1288 }
1289 }
1290
1291 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
1292 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1293 for (uint32_t n = 16; n <= 24; n += 8) {
1294 for (size_t k = 1; k <= 20; k += 5) {
1295 GemmMicrokernelTester()
1296 .mr(1)
1297 .nr(8)
1298 .kr(1)
1299 .sr(1)
1300 .m(1)
1301 .n(n)
1302 .k(k)
1303 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001304 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001305 }
1306 }
1307 }
1308
1309 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
1310 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1311 for (uint32_t n = 16; n <= 24; n += 8) {
1312 for (size_t k = 1; k <= 20; k += 5) {
1313 for (uint32_t m = 1; m <= 1; m++) {
1314 GemmMicrokernelTester()
1315 .mr(1)
1316 .nr(8)
1317 .kr(1)
1318 .sr(1)
1319 .m(m)
1320 .n(n)
1321 .k(k)
1322 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001323 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001324 }
1325 }
1326 }
1327 }
1328
1329 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1331 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1f4e4612020-04-13 18:24:54 -07001334 GemmMicrokernelTester()
1335 .mr(1)
1336 .nr(8)
1337 .kr(1)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001344 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001345 }
1346 }
1347 }
1348 }
1349
1350 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, qmin) {
1351 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1352 GemmMicrokernelTester()
1353 .mr(1)
1354 .nr(8)
1355 .kr(1)
1356 .sr(1)
1357 .m(1)
1358 .n(8)
1359 .k(4)
1360 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001361 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001362 }
1363
1364 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, qmax) {
1365 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1366 GemmMicrokernelTester()
1367 .mr(1)
1368 .nr(8)
1369 .kr(1)
1370 .sr(1)
1371 .m(1)
1372 .n(8)
1373 .k(4)
1374 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001375 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001376 }
1377
1378 TEST(F16_GEMM_MINMAX_1X8__NEONFP16ARITH_LD64, strided_cm) {
1379 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1380 GemmMicrokernelTester()
1381 .mr(1)
1382 .nr(8)
1383 .kr(1)
1384 .sr(1)
1385 .m(1)
1386 .n(8)
1387 .k(4)
1388 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001389 .Test(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard1f4e4612020-04-13 18:24:54 -07001390 }
1391#endif // XNN_ARCH_ARM64
1392
1393
1394#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07001395 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001396 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1397 GemmMicrokernelTester()
1398 .mr(4)
1399 .nr(8)
1400 .kr(1)
1401 .sr(1)
1402 .m(4)
1403 .n(8)
1404 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001405 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001406 }
1407
Marat Dukhande06f492020-04-09 00:19:31 -07001408 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001409 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1410 GemmMicrokernelTester()
1411 .mr(4)
1412 .nr(8)
1413 .kr(1)
1414 .sr(1)
1415 .m(4)
1416 .n(8)
1417 .k(4)
1418 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001419 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001420 }
1421
Marat Dukhande06f492020-04-09 00:19:31 -07001422 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001423 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(8)
1427 .kr(1)
1428 .sr(1)
1429 .m(4)
1430 .n(8)
1431 .k(4)
1432 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001433 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001434 }
1435
Marat Dukhande06f492020-04-09 00:19:31 -07001436 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001437 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001438 for (uint32_t n = 1; n <= 8; n++) {
1439 for (uint32_t m = 1; m <= 4; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(8)
1443 .kr(1)
1444 .sr(1)
1445 .m(m)
1446 .n(n)
1447 .k(4)
1448 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001449 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001450 }
1451 }
1452 }
1453
Marat Dukhande06f492020-04-09 00:19:31 -07001454 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001455 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1456 for (uint32_t m = 1; m <= 4; m++) {
1457 GemmMicrokernelTester()
1458 .mr(4)
1459 .nr(8)
1460 .kr(1)
1461 .sr(1)
1462 .m(m)
1463 .n(8)
1464 .k(4)
1465 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001466 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001467 }
1468 }
1469
Marat Dukhande06f492020-04-09 00:19:31 -07001470 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001471 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1472 for (uint32_t n = 1; n <= 8; n++) {
1473 GemmMicrokernelTester()
1474 .mr(4)
1475 .nr(8)
1476 .kr(1)
1477 .sr(1)
1478 .m(4)
1479 .n(n)
1480 .k(4)
1481 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001482 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001483 }
1484 }
1485
Marat Dukhande06f492020-04-09 00:19:31 -07001486 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001487 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1488 for (size_t k = 1; k < 4; k++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(1)
1493 .sr(1)
1494 .m(4)
1495 .n(8)
1496 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001497 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001498 }
1499 }
1500
Marat Dukhande06f492020-04-09 00:19:31 -07001501 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001502 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1503 for (size_t k = 1; k < 4; k++) {
1504 GemmMicrokernelTester()
1505 .mr(4)
1506 .nr(8)
1507 .kr(1)
1508 .sr(1)
1509 .m(4)
1510 .n(8)
1511 .k(k)
1512 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001513 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001514 }
1515 }
1516
Marat Dukhande06f492020-04-09 00:19:31 -07001517 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001518 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1519 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001520 for (uint32_t n = 1; n <= 8; n++) {
1521 for (uint32_t m = 1; m <= 4; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001522 GemmMicrokernelTester()
1523 .mr(4)
1524 .nr(8)
1525 .kr(1)
1526 .sr(1)
1527 .m(m)
1528 .n(n)
1529 .k(k)
1530 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001531 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001532 }
1533 }
1534 }
1535 }
1536
Marat Dukhande06f492020-04-09 00:19:31 -07001537 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001538 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1539 for (size_t k = 5; k < 8; k++) {
1540 GemmMicrokernelTester()
1541 .mr(4)
1542 .nr(8)
1543 .kr(1)
1544 .sr(1)
1545 .m(4)
1546 .n(8)
1547 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001548 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001549 }
1550 }
1551
Marat Dukhande06f492020-04-09 00:19:31 -07001552 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001553 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1554 for (size_t k = 5; k < 8; k++) {
1555 GemmMicrokernelTester()
1556 .mr(4)
1557 .nr(8)
1558 .kr(1)
1559 .sr(1)
1560 .m(4)
1561 .n(8)
1562 .k(k)
1563 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001564 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001565 }
1566 }
1567
Marat Dukhande06f492020-04-09 00:19:31 -07001568 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001569 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1570 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001571 for (uint32_t n = 1; n <= 8; n++) {
1572 for (uint32_t m = 1; m <= 4; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(8)
1576 .kr(1)
1577 .sr(1)
1578 .m(m)
1579 .n(n)
1580 .k(k)
1581 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001582 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001583 }
1584 }
1585 }
1586 }
1587
Marat Dukhande06f492020-04-09 00:19:31 -07001588 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001589 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1590 for (size_t k = 8; k <= 40; k += 4) {
1591 GemmMicrokernelTester()
1592 .mr(4)
1593 .nr(8)
1594 .kr(1)
1595 .sr(1)
1596 .m(4)
1597 .n(8)
1598 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001599 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001600 }
1601 }
1602
Marat Dukhande06f492020-04-09 00:19:31 -07001603 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001604 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1605 for (size_t k = 8; k <= 40; k += 4) {
1606 GemmMicrokernelTester()
1607 .mr(4)
1608 .nr(8)
1609 .kr(1)
1610 .sr(1)
1611 .m(4)
1612 .n(8)
1613 .k(k)
1614 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001615 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001616 }
1617 }
1618
Marat Dukhande06f492020-04-09 00:19:31 -07001619 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001620 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1621 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001622 for (uint32_t n = 1; n <= 8; n++) {
1623 for (uint32_t m = 1; m <= 4; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001624 GemmMicrokernelTester()
1625 .mr(4)
1626 .nr(8)
1627 .kr(1)
1628 .sr(1)
1629 .m(m)
1630 .n(n)
1631 .k(k)
1632 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001633 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001634 }
1635 }
1636 }
1637 }
1638
Marat Dukhande06f492020-04-09 00:19:31 -07001639 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001640 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1641 for (uint32_t n = 9; n < 16; n++) {
1642 for (size_t k = 1; k <= 20; k += 5) {
1643 GemmMicrokernelTester()
1644 .mr(4)
1645 .nr(8)
1646 .kr(1)
1647 .sr(1)
1648 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001649 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001650 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001651 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001652 }
1653 }
1654 }
1655
Marat Dukhande06f492020-04-09 00:19:31 -07001656 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001657 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1658 for (uint32_t n = 9; n < 16; n++) {
1659 for (size_t k = 1; k <= 20; k += 5) {
1660 GemmMicrokernelTester()
1661 .mr(4)
1662 .nr(8)
1663 .kr(1)
1664 .sr(1)
1665 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001666 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001667 .k(k)
1668 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001669 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001670 }
1671 }
1672 }
1673
Marat Dukhande06f492020-04-09 00:19:31 -07001674 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001675 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1676 for (uint32_t n = 9; n < 16; n++) {
1677 for (size_t k = 1; k <= 20; k += 5) {
1678 GemmMicrokernelTester()
1679 .mr(4)
1680 .nr(8)
1681 .kr(1)
1682 .sr(1)
1683 .m(4)
1684 .n(n)
1685 .k(k)
1686 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001687 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001688 }
1689 }
1690 }
1691
Marat Dukhande06f492020-04-09 00:19:31 -07001692 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001693 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 20; k += 5) {
1696 for (uint32_t m = 1; m <= 4; m++) {
1697 GemmMicrokernelTester()
1698 .mr(4)
1699 .nr(8)
1700 .kr(1)
1701 .sr(1)
1702 .m(m)
1703 .n(n)
1704 .k(k)
1705 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001706 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001707 }
1708 }
1709 }
1710 }
1711
Marat Dukhande06f492020-04-09 00:19:31 -07001712 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001713 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1714 for (uint32_t n = 16; n <= 24; n += 8) {
1715 for (size_t k = 1; k <= 20; k += 5) {
1716 GemmMicrokernelTester()
1717 .mr(4)
1718 .nr(8)
1719 .kr(1)
1720 .sr(1)
1721 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001722 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07001723 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001724 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001725 }
1726 }
1727 }
1728
Marat Dukhande06f492020-04-09 00:19:31 -07001729 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001730 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1731 for (uint32_t n = 16; n <= 24; n += 8) {
1732 for (size_t k = 1; k <= 20; k += 5) {
1733 GemmMicrokernelTester()
1734 .mr(4)
1735 .nr(8)
1736 .kr(1)
1737 .sr(1)
1738 .m(4)
1739 .n(n)
1740 .k(k)
1741 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001742 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001743 }
1744 }
1745 }
1746
Marat Dukhande06f492020-04-09 00:19:31 -07001747 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001748 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1749 for (uint32_t n = 16; n <= 24; n += 8) {
1750 for (size_t k = 1; k <= 20; k += 5) {
1751 GemmMicrokernelTester()
1752 .mr(4)
1753 .nr(8)
1754 .kr(1)
1755 .sr(1)
1756 .m(4)
1757 .n(n)
1758 .k(k)
1759 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001760 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001761 }
1762 }
1763 }
1764
Marat Dukhande06f492020-04-09 00:19:31 -07001765 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001766 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1767 for (uint32_t n = 16; n <= 24; n += 8) {
1768 for (size_t k = 1; k <= 20; k += 5) {
1769 for (uint32_t m = 1; m <= 4; m++) {
1770 GemmMicrokernelTester()
1771 .mr(4)
1772 .nr(8)
1773 .kr(1)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001779 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001780 }
1781 }
1782 }
1783 }
1784
Marat Dukhande06f492020-04-09 00:19:31 -07001785 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001786 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1787 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001788 for (uint32_t n = 1; n <= 8; n++) {
1789 for (uint32_t m = 1; m <= 4; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001790 GemmMicrokernelTester()
1791 .mr(4)
1792 .nr(8)
1793 .kr(1)
1794 .sr(1)
1795 .m(m)
1796 .n(n)
1797 .k(k)
1798 .cm_stride(11)
1799 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001800 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001801 }
1802 }
1803 }
1804 }
1805
Marat Dukhande06f492020-04-09 00:19:31 -07001806 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001807 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1808 GemmMicrokernelTester()
1809 .mr(4)
1810 .nr(8)
1811 .kr(1)
1812 .sr(1)
1813 .m(4)
1814 .n(8)
1815 .k(4)
1816 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001817 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001818 }
1819
Marat Dukhande06f492020-04-09 00:19:31 -07001820 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001821 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1822 GemmMicrokernelTester()
1823 .mr(4)
1824 .nr(8)
1825 .kr(1)
1826 .sr(1)
1827 .m(4)
1828 .n(8)
1829 .k(4)
1830 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001831 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001832 }
1833
Marat Dukhande06f492020-04-09 00:19:31 -07001834 TEST(F16_GEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cm) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001835 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1836 GemmMicrokernelTester()
1837 .mr(4)
1838 .nr(8)
1839 .kr(1)
1840 .sr(1)
1841 .m(4)
1842 .n(8)
1843 .k(4)
1844 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001845 .Test(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001846 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001847#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07001848
1849
Marat Dukhan1dadbf72019-10-01 10:46:20 -07001850#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07001851 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001852 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1853 GemmMicrokernelTester()
1854 .mr(6)
1855 .nr(8)
1856 .kr(1)
1857 .sr(1)
1858 .m(6)
1859 .n(8)
1860 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001861 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001862 }
1863
Marat Dukhande06f492020-04-09 00:19:31 -07001864 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001865 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1866 GemmMicrokernelTester()
1867 .mr(6)
1868 .nr(8)
1869 .kr(1)
1870 .sr(1)
1871 .m(6)
1872 .n(8)
1873 .k(4)
1874 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001875 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001876 }
1877
Marat Dukhande06f492020-04-09 00:19:31 -07001878 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001879 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1880 GemmMicrokernelTester()
1881 .mr(6)
1882 .nr(8)
1883 .kr(1)
1884 .sr(1)
1885 .m(6)
1886 .n(8)
1887 .k(4)
1888 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001889 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001890 }
1891
Marat Dukhande06f492020-04-09 00:19:31 -07001892 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001893 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001894 for (uint32_t n = 1; n <= 8; n++) {
1895 for (uint32_t m = 1; m <= 6; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001896 GemmMicrokernelTester()
1897 .mr(6)
1898 .nr(8)
1899 .kr(1)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(4)
1904 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001905 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001906 }
1907 }
1908 }
1909
Marat Dukhande06f492020-04-09 00:19:31 -07001910 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001911 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1912 for (uint32_t m = 1; m <= 6; m++) {
1913 GemmMicrokernelTester()
1914 .mr(6)
1915 .nr(8)
1916 .kr(1)
1917 .sr(1)
1918 .m(m)
1919 .n(8)
1920 .k(4)
1921 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001922 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001923 }
1924 }
1925
Marat Dukhande06f492020-04-09 00:19:31 -07001926 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001927 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1928 for (uint32_t n = 1; n <= 8; n++) {
1929 GemmMicrokernelTester()
1930 .mr(6)
1931 .nr(8)
1932 .kr(1)
1933 .sr(1)
1934 .m(6)
1935 .n(n)
1936 .k(4)
1937 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001938 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001939 }
1940 }
1941
Marat Dukhande06f492020-04-09 00:19:31 -07001942 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001943 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1944 for (size_t k = 1; k < 4; k++) {
1945 GemmMicrokernelTester()
1946 .mr(6)
1947 .nr(8)
1948 .kr(1)
1949 .sr(1)
1950 .m(6)
1951 .n(8)
1952 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001953 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001954 }
1955 }
1956
Marat Dukhande06f492020-04-09 00:19:31 -07001957 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001958 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1959 for (size_t k = 1; k < 4; k++) {
1960 GemmMicrokernelTester()
1961 .mr(6)
1962 .nr(8)
1963 .kr(1)
1964 .sr(1)
1965 .m(6)
1966 .n(8)
1967 .k(k)
1968 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001969 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001970 }
1971 }
1972
Marat Dukhande06f492020-04-09 00:19:31 -07001973 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001974 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1975 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001976 for (uint32_t n = 1; n <= 8; n++) {
1977 for (uint32_t m = 1; m <= 6; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001978 GemmMicrokernelTester()
1979 .mr(6)
1980 .nr(8)
1981 .kr(1)
1982 .sr(1)
1983 .m(m)
1984 .n(n)
1985 .k(k)
1986 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001987 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07001988 }
1989 }
1990 }
1991 }
1992
Marat Dukhande06f492020-04-09 00:19:31 -07001993 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07001994 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1995 for (size_t k = 5; k < 8; k++) {
1996 GemmMicrokernelTester()
1997 .mr(6)
1998 .nr(8)
1999 .kr(1)
2000 .sr(1)
2001 .m(6)
2002 .n(8)
2003 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002004 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002005 }
2006 }
2007
Marat Dukhande06f492020-04-09 00:19:31 -07002008 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002009 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2010 for (size_t k = 5; k < 8; k++) {
2011 GemmMicrokernelTester()
2012 .mr(6)
2013 .nr(8)
2014 .kr(1)
2015 .sr(1)
2016 .m(6)
2017 .n(8)
2018 .k(k)
2019 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002020 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002021 }
2022 }
2023
Marat Dukhande06f492020-04-09 00:19:31 -07002024 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002025 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2026 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002027 for (uint32_t n = 1; n <= 8; n++) {
2028 for (uint32_t m = 1; m <= 6; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002029 GemmMicrokernelTester()
2030 .mr(6)
2031 .nr(8)
2032 .kr(1)
2033 .sr(1)
2034 .m(m)
2035 .n(n)
2036 .k(k)
2037 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002038 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002039 }
2040 }
2041 }
2042 }
2043
Marat Dukhande06f492020-04-09 00:19:31 -07002044 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002045 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2046 for (size_t k = 8; k <= 40; k += 4) {
2047 GemmMicrokernelTester()
2048 .mr(6)
2049 .nr(8)
2050 .kr(1)
2051 .sr(1)
2052 .m(6)
2053 .n(8)
2054 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002055 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002056 }
2057 }
2058
Marat Dukhande06f492020-04-09 00:19:31 -07002059 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002060 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2061 for (size_t k = 8; k <= 40; k += 4) {
2062 GemmMicrokernelTester()
2063 .mr(6)
2064 .nr(8)
2065 .kr(1)
2066 .sr(1)
2067 .m(6)
2068 .n(8)
2069 .k(k)
2070 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002071 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002072 }
2073 }
2074
Marat Dukhande06f492020-04-09 00:19:31 -07002075 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002076 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2077 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002078 for (uint32_t n = 1; n <= 8; n++) {
2079 for (uint32_t m = 1; m <= 6; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002080 GemmMicrokernelTester()
2081 .mr(6)
2082 .nr(8)
2083 .kr(1)
2084 .sr(1)
2085 .m(m)
2086 .n(n)
2087 .k(k)
2088 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002089 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002090 }
2091 }
2092 }
2093 }
2094
Marat Dukhande06f492020-04-09 00:19:31 -07002095 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002096 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2097 for (uint32_t n = 9; n < 16; n++) {
2098 for (size_t k = 1; k <= 20; k += 5) {
2099 GemmMicrokernelTester()
2100 .mr(6)
2101 .nr(8)
2102 .kr(1)
2103 .sr(1)
2104 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002105 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07002106 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002107 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002108 }
2109 }
2110 }
2111
Marat Dukhande06f492020-04-09 00:19:31 -07002112 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002113 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2114 for (uint32_t n = 9; n < 16; n++) {
2115 for (size_t k = 1; k <= 20; k += 5) {
2116 GemmMicrokernelTester()
2117 .mr(6)
2118 .nr(8)
2119 .kr(1)
2120 .sr(1)
2121 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002122 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07002123 .k(k)
2124 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002125 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002126 }
2127 }
2128 }
2129
Marat Dukhande06f492020-04-09 00:19:31 -07002130 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002131 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2132 for (uint32_t n = 9; n < 16; n++) {
2133 for (size_t k = 1; k <= 20; k += 5) {
2134 GemmMicrokernelTester()
2135 .mr(6)
2136 .nr(8)
2137 .kr(1)
2138 .sr(1)
2139 .m(6)
2140 .n(n)
2141 .k(k)
2142 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002143 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002144 }
2145 }
2146 }
2147
Marat Dukhande06f492020-04-09 00:19:31 -07002148 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002149 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2150 for (uint32_t n = 9; n < 16; n++) {
2151 for (size_t k = 1; k <= 20; k += 5) {
2152 for (uint32_t m = 1; m <= 6; m++) {
2153 GemmMicrokernelTester()
2154 .mr(6)
2155 .nr(8)
2156 .kr(1)
2157 .sr(1)
2158 .m(m)
2159 .n(n)
2160 .k(k)
2161 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002162 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002163 }
2164 }
2165 }
2166 }
2167
Marat Dukhande06f492020-04-09 00:19:31 -07002168 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002169 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2170 for (uint32_t n = 16; n <= 24; n += 8) {
2171 for (size_t k = 1; k <= 20; k += 5) {
2172 GemmMicrokernelTester()
2173 .mr(6)
2174 .nr(8)
2175 .kr(1)
2176 .sr(1)
2177 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002178 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07002179 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002180 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002181 }
2182 }
2183 }
2184
Marat Dukhande06f492020-04-09 00:19:31 -07002185 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002186 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2187 for (uint32_t n = 16; n <= 24; n += 8) {
2188 for (size_t k = 1; k <= 20; k += 5) {
2189 GemmMicrokernelTester()
2190 .mr(6)
2191 .nr(8)
2192 .kr(1)
2193 .sr(1)
2194 .m(6)
2195 .n(n)
2196 .k(k)
2197 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002198 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002199 }
2200 }
2201 }
2202
Marat Dukhande06f492020-04-09 00:19:31 -07002203 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002204 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2205 for (uint32_t n = 16; n <= 24; n += 8) {
2206 for (size_t k = 1; k <= 20; k += 5) {
2207 GemmMicrokernelTester()
2208 .mr(6)
2209 .nr(8)
2210 .kr(1)
2211 .sr(1)
2212 .m(6)
2213 .n(n)
2214 .k(k)
2215 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002216 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002217 }
2218 }
2219 }
2220
Marat Dukhande06f492020-04-09 00:19:31 -07002221 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002222 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2223 for (uint32_t n = 16; n <= 24; n += 8) {
2224 for (size_t k = 1; k <= 20; k += 5) {
2225 for (uint32_t m = 1; m <= 6; m++) {
2226 GemmMicrokernelTester()
2227 .mr(6)
2228 .nr(8)
2229 .kr(1)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002235 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002236 }
2237 }
2238 }
2239 }
2240
Marat Dukhande06f492020-04-09 00:19:31 -07002241 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002242 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2243 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002244 for (uint32_t n = 1; n <= 8; n++) {
2245 for (uint32_t m = 1; m <= 6; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002246 GemmMicrokernelTester()
2247 .mr(6)
2248 .nr(8)
2249 .kr(1)
2250 .sr(1)
2251 .m(m)
2252 .n(n)
2253 .k(k)
2254 .cm_stride(11)
2255 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002256 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002257 }
2258 }
2259 }
2260 }
2261
Marat Dukhande06f492020-04-09 00:19:31 -07002262 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002263 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2264 GemmMicrokernelTester()
2265 .mr(6)
2266 .nr(8)
2267 .kr(1)
2268 .sr(1)
2269 .m(6)
2270 .n(8)
2271 .k(4)
2272 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002273 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002274 }
2275
Marat Dukhande06f492020-04-09 00:19:31 -07002276 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002277 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2278 GemmMicrokernelTester()
2279 .mr(6)
2280 .nr(8)
2281 .kr(1)
2282 .sr(1)
2283 .m(6)
2284 .n(8)
2285 .k(4)
2286 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002287 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002288 }
2289
Marat Dukhande06f492020-04-09 00:19:31 -07002290 TEST(F16_GEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cm) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002291 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2292 GemmMicrokernelTester()
2293 .mr(6)
2294 .nr(8)
2295 .kr(1)
2296 .sr(1)
2297 .m(6)
2298 .n(8)
2299 .k(4)
2300 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002301 .Test(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002302 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002303#endif // XNN_ARCH_ARM64
XNNPACK Teamb455b122019-09-27 18:10:33 -07002304
2305
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002306#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07002307 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002308 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2309 GemmMicrokernelTester()
2310 .mr(8)
2311 .nr(8)
2312 .kr(1)
2313 .sr(1)
2314 .m(8)
2315 .n(8)
2316 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002317 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002318 }
2319
Marat Dukhande06f492020-04-09 00:19:31 -07002320 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002321 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2322 GemmMicrokernelTester()
2323 .mr(8)
2324 .nr(8)
2325 .kr(1)
2326 .sr(1)
2327 .m(8)
2328 .n(8)
2329 .k(4)
2330 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002331 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002332 }
2333
Marat Dukhande06f492020-04-09 00:19:31 -07002334 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002335 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2336 GemmMicrokernelTester()
2337 .mr(8)
2338 .nr(8)
2339 .kr(1)
2340 .sr(1)
2341 .m(8)
2342 .n(8)
2343 .k(4)
2344 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002345 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002346 }
2347
Marat Dukhande06f492020-04-09 00:19:31 -07002348 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002349 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002350 for (uint32_t n = 1; n <= 8; n++) {
2351 for (uint32_t m = 1; m <= 8; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002352 GemmMicrokernelTester()
2353 .mr(8)
2354 .nr(8)
2355 .kr(1)
2356 .sr(1)
2357 .m(m)
2358 .n(n)
2359 .k(4)
2360 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002361 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002362 }
2363 }
2364 }
2365
Marat Dukhande06f492020-04-09 00:19:31 -07002366 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002367 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2368 for (uint32_t m = 1; m <= 8; m++) {
2369 GemmMicrokernelTester()
2370 .mr(8)
2371 .nr(8)
2372 .kr(1)
2373 .sr(1)
2374 .m(m)
2375 .n(8)
2376 .k(4)
2377 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002378 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002379 }
2380 }
2381
Marat Dukhande06f492020-04-09 00:19:31 -07002382 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002383 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2384 for (uint32_t n = 1; n <= 8; n++) {
2385 GemmMicrokernelTester()
2386 .mr(8)
2387 .nr(8)
2388 .kr(1)
2389 .sr(1)
2390 .m(8)
2391 .n(n)
2392 .k(4)
2393 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002394 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002395 }
2396 }
2397
Marat Dukhande06f492020-04-09 00:19:31 -07002398 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002399 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2400 for (size_t k = 1; k < 4; k++) {
2401 GemmMicrokernelTester()
2402 .mr(8)
2403 .nr(8)
2404 .kr(1)
2405 .sr(1)
2406 .m(8)
2407 .n(8)
2408 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002409 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002410 }
2411 }
2412
Marat Dukhande06f492020-04-09 00:19:31 -07002413 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002414 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2415 for (size_t k = 1; k < 4; k++) {
2416 GemmMicrokernelTester()
2417 .mr(8)
2418 .nr(8)
2419 .kr(1)
2420 .sr(1)
2421 .m(8)
2422 .n(8)
2423 .k(k)
2424 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002425 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002426 }
2427 }
2428
Marat Dukhande06f492020-04-09 00:19:31 -07002429 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002430 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2431 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002432 for (uint32_t n = 1; n <= 8; n++) {
2433 for (uint32_t m = 1; m <= 8; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002434 GemmMicrokernelTester()
2435 .mr(8)
2436 .nr(8)
2437 .kr(1)
2438 .sr(1)
2439 .m(m)
2440 .n(n)
2441 .k(k)
2442 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002443 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002444 }
2445 }
2446 }
2447 }
2448
Marat Dukhande06f492020-04-09 00:19:31 -07002449 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002450 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2451 for (size_t k = 5; k < 8; k++) {
2452 GemmMicrokernelTester()
2453 .mr(8)
2454 .nr(8)
2455 .kr(1)
2456 .sr(1)
2457 .m(8)
2458 .n(8)
2459 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002460 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002461 }
2462 }
2463
Marat Dukhande06f492020-04-09 00:19:31 -07002464 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002465 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2466 for (size_t k = 5; k < 8; k++) {
2467 GemmMicrokernelTester()
2468 .mr(8)
2469 .nr(8)
2470 .kr(1)
2471 .sr(1)
2472 .m(8)
2473 .n(8)
2474 .k(k)
2475 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002476 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002477 }
2478 }
2479
Marat Dukhande06f492020-04-09 00:19:31 -07002480 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002481 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2482 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002483 for (uint32_t n = 1; n <= 8; n++) {
2484 for (uint32_t m = 1; m <= 8; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002485 GemmMicrokernelTester()
2486 .mr(8)
2487 .nr(8)
2488 .kr(1)
2489 .sr(1)
2490 .m(m)
2491 .n(n)
2492 .k(k)
2493 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002494 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002495 }
2496 }
2497 }
2498 }
2499
Marat Dukhande06f492020-04-09 00:19:31 -07002500 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002501 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2502 for (size_t k = 8; k <= 40; k += 4) {
2503 GemmMicrokernelTester()
2504 .mr(8)
2505 .nr(8)
2506 .kr(1)
2507 .sr(1)
2508 .m(8)
2509 .n(8)
2510 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002511 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002512 }
2513 }
2514
Marat Dukhande06f492020-04-09 00:19:31 -07002515 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002516 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2517 for (size_t k = 8; k <= 40; k += 4) {
2518 GemmMicrokernelTester()
2519 .mr(8)
2520 .nr(8)
2521 .kr(1)
2522 .sr(1)
2523 .m(8)
2524 .n(8)
2525 .k(k)
2526 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002527 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002528 }
2529 }
2530
Marat Dukhande06f492020-04-09 00:19:31 -07002531 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002532 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2533 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002534 for (uint32_t n = 1; n <= 8; n++) {
2535 for (uint32_t m = 1; m <= 8; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002536 GemmMicrokernelTester()
2537 .mr(8)
2538 .nr(8)
2539 .kr(1)
2540 .sr(1)
2541 .m(m)
2542 .n(n)
2543 .k(k)
2544 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002545 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002546 }
2547 }
2548 }
2549 }
2550
Marat Dukhande06f492020-04-09 00:19:31 -07002551 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002552 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2553 for (uint32_t n = 9; n < 16; n++) {
2554 for (size_t k = 1; k <= 20; k += 5) {
2555 GemmMicrokernelTester()
2556 .mr(8)
2557 .nr(8)
2558 .kr(1)
2559 .sr(1)
2560 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002561 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07002562 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002563 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002564 }
2565 }
2566 }
2567
Marat Dukhande06f492020-04-09 00:19:31 -07002568 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002569 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2570 for (uint32_t n = 9; n < 16; n++) {
2571 for (size_t k = 1; k <= 20; k += 5) {
2572 GemmMicrokernelTester()
2573 .mr(8)
2574 .nr(8)
2575 .kr(1)
2576 .sr(1)
2577 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002578 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07002579 .k(k)
2580 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002581 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002582 }
2583 }
2584 }
2585
Marat Dukhande06f492020-04-09 00:19:31 -07002586 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002587 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2588 for (uint32_t n = 9; n < 16; n++) {
2589 for (size_t k = 1; k <= 20; k += 5) {
2590 GemmMicrokernelTester()
2591 .mr(8)
2592 .nr(8)
2593 .kr(1)
2594 .sr(1)
2595 .m(8)
2596 .n(n)
2597 .k(k)
2598 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002599 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002600 }
2601 }
2602 }
2603
Marat Dukhande06f492020-04-09 00:19:31 -07002604 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002605 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2606 for (uint32_t n = 9; n < 16; n++) {
2607 for (size_t k = 1; k <= 20; k += 5) {
2608 for (uint32_t m = 1; m <= 8; m++) {
2609 GemmMicrokernelTester()
2610 .mr(8)
2611 .nr(8)
2612 .kr(1)
2613 .sr(1)
2614 .m(m)
2615 .n(n)
2616 .k(k)
2617 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002618 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002619 }
2620 }
2621 }
2622 }
2623
Marat Dukhande06f492020-04-09 00:19:31 -07002624 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002625 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2626 for (uint32_t n = 16; n <= 24; n += 8) {
2627 for (size_t k = 1; k <= 20; k += 5) {
2628 GemmMicrokernelTester()
2629 .mr(8)
2630 .nr(8)
2631 .kr(1)
2632 .sr(1)
2633 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002634 .n(n)
XNNPACK Teamb455b122019-09-27 18:10:33 -07002635 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002636 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002637 }
2638 }
2639 }
2640
Marat Dukhande06f492020-04-09 00:19:31 -07002641 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002642 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2643 for (uint32_t n = 16; n <= 24; n += 8) {
2644 for (size_t k = 1; k <= 20; k += 5) {
2645 GemmMicrokernelTester()
2646 .mr(8)
2647 .nr(8)
2648 .kr(1)
2649 .sr(1)
2650 .m(8)
2651 .n(n)
2652 .k(k)
2653 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002654 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002655 }
2656 }
2657 }
2658
Marat Dukhande06f492020-04-09 00:19:31 -07002659 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_strided_a) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002660 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2661 for (uint32_t n = 16; n <= 24; n += 8) {
2662 for (size_t k = 1; k <= 20; k += 5) {
2663 GemmMicrokernelTester()
2664 .mr(8)
2665 .nr(8)
2666 .kr(1)
2667 .sr(1)
2668 .m(8)
2669 .n(n)
2670 .k(k)
2671 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002672 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002673 }
2674 }
2675 }
2676
Marat Dukhande06f492020-04-09 00:19:31 -07002677 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002678 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2679 for (uint32_t n = 16; n <= 24; n += 8) {
2680 for (size_t k = 1; k <= 20; k += 5) {
2681 for (uint32_t m = 1; m <= 8; m++) {
2682 GemmMicrokernelTester()
2683 .mr(8)
2684 .nr(8)
2685 .kr(1)
2686 .sr(1)
2687 .m(m)
2688 .n(n)
2689 .k(k)
2690 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002691 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002692 }
2693 }
2694 }
2695 }
2696
Marat Dukhande06f492020-04-09 00:19:31 -07002697 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002698 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2699 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002700 for (uint32_t n = 1; n <= 8; n++) {
2701 for (uint32_t m = 1; m <= 8; m++) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002702 GemmMicrokernelTester()
2703 .mr(8)
2704 .nr(8)
2705 .kr(1)
2706 .sr(1)
2707 .m(m)
2708 .n(n)
2709 .k(k)
2710 .cm_stride(11)
2711 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002712 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002713 }
2714 }
2715 }
2716 }
2717
Marat Dukhande06f492020-04-09 00:19:31 -07002718 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, qmin) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002719 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2720 GemmMicrokernelTester()
2721 .mr(8)
2722 .nr(8)
2723 .kr(1)
2724 .sr(1)
2725 .m(8)
2726 .n(8)
2727 .k(4)
2728 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002729 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002730 }
2731
Marat Dukhande06f492020-04-09 00:19:31 -07002732 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, qmax) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002733 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2734 GemmMicrokernelTester()
2735 .mr(8)
2736 .nr(8)
2737 .kr(1)
2738 .sr(1)
2739 .m(8)
2740 .n(8)
2741 .k(4)
2742 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002743 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002744 }
2745
Marat Dukhande06f492020-04-09 00:19:31 -07002746 TEST(F16_GEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cm) {
XNNPACK Teamb455b122019-09-27 18:10:33 -07002747 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2748 GemmMicrokernelTester()
2749 .mr(8)
2750 .nr(8)
2751 .kr(1)
2752 .sr(1)
2753 .m(8)
2754 .n(8)
2755 .k(4)
2756 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002757 .Test(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -07002758 }
Marat Dukhan1dadbf72019-10-01 10:46:20 -07002759#endif // XNN_ARCH_ARM64
Frank Barchard683f5592020-04-10 00:48:26 -07002760
2761
2762#if XNN_ARCH_ARM64
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002763 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4) {
2764 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2765 GemmMicrokernelTester()
2766 .mr(1)
2767 .nr(16)
2768 .kr(1)
2769 .sr(1)
2770 .m(1)
2771 .n(16)
2772 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002773 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002774 }
2775
2776 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, strided_cn) {
2777 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2778 GemmMicrokernelTester()
2779 .mr(1)
2780 .nr(16)
2781 .kr(1)
2782 .sr(1)
2783 .m(1)
2784 .n(16)
2785 .k(4)
2786 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002787 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002788 }
2789
2790 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
2791 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2792 GemmMicrokernelTester()
2793 .mr(1)
2794 .nr(16)
2795 .kr(1)
2796 .sr(1)
2797 .m(1)
2798 .n(16)
2799 .k(4)
2800 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002801 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002802 }
2803
2804 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
2805 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002806 for (uint32_t n = 1; n <= 16; n++) {
2807 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002808 GemmMicrokernelTester()
2809 .mr(1)
2810 .nr(16)
2811 .kr(1)
2812 .sr(1)
2813 .m(m)
2814 .n(n)
2815 .k(4)
2816 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002817 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002818 }
2819 }
2820 }
2821
2822 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
2823 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2824 for (uint32_t m = 1; m <= 1; m++) {
2825 GemmMicrokernelTester()
2826 .mr(1)
2827 .nr(16)
2828 .kr(1)
2829 .sr(1)
2830 .m(m)
2831 .n(16)
2832 .k(4)
2833 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002834 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002835 }
2836 }
2837
2838 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
2839 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2840 for (uint32_t n = 1; n <= 16; n++) {
2841 GemmMicrokernelTester()
2842 .mr(1)
2843 .nr(16)
2844 .kr(1)
2845 .sr(1)
2846 .m(1)
2847 .n(n)
2848 .k(4)
2849 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002850 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002851 }
2852 }
2853
2854 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_lt_4) {
2855 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2856 for (size_t k = 1; k < 4; k++) {
2857 GemmMicrokernelTester()
2858 .mr(1)
2859 .nr(16)
2860 .kr(1)
2861 .sr(1)
2862 .m(1)
2863 .n(16)
2864 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002865 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002866 }
2867 }
2868
2869 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
2870 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2871 for (size_t k = 1; k < 4; k++) {
2872 GemmMicrokernelTester()
2873 .mr(1)
2874 .nr(16)
2875 .kr(1)
2876 .sr(1)
2877 .m(1)
2878 .n(16)
2879 .k(k)
2880 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002881 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002882 }
2883 }
2884
2885 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
2886 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2887 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002888 for (uint32_t n = 1; n <= 16; n++) {
2889 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002890 GemmMicrokernelTester()
2891 .mr(1)
2892 .nr(16)
2893 .kr(1)
2894 .sr(1)
2895 .m(m)
2896 .n(n)
2897 .k(k)
2898 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002899 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002900 }
2901 }
2902 }
2903 }
2904
2905 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_gt_4) {
2906 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2907 for (size_t k = 5; k < 8; k++) {
2908 GemmMicrokernelTester()
2909 .mr(1)
2910 .nr(16)
2911 .kr(1)
2912 .sr(1)
2913 .m(1)
2914 .n(16)
2915 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002916 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002917 }
2918 }
2919
2920 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
2921 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2922 for (size_t k = 5; k < 8; k++) {
2923 GemmMicrokernelTester()
2924 .mr(1)
2925 .nr(16)
2926 .kr(1)
2927 .sr(1)
2928 .m(1)
2929 .n(16)
2930 .k(k)
2931 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002932 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002933 }
2934 }
2935
2936 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
2937 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2938 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002939 for (uint32_t n = 1; n <= 16; n++) {
2940 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002941 GemmMicrokernelTester()
2942 .mr(1)
2943 .nr(16)
2944 .kr(1)
2945 .sr(1)
2946 .m(m)
2947 .n(n)
2948 .k(k)
2949 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002950 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002951 }
2952 }
2953 }
2954 }
2955
2956 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_div_4) {
2957 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2958 for (size_t k = 8; k <= 40; k += 4) {
2959 GemmMicrokernelTester()
2960 .mr(1)
2961 .nr(16)
2962 .kr(1)
2963 .sr(1)
2964 .m(1)
2965 .n(16)
2966 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002967 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002968 }
2969 }
2970
2971 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_div_4_strided_a) {
2972 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2973 for (size_t k = 8; k <= 40; k += 4) {
2974 GemmMicrokernelTester()
2975 .mr(1)
2976 .nr(16)
2977 .kr(1)
2978 .sr(1)
2979 .m(1)
2980 .n(16)
2981 .k(k)
2982 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002983 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002984 }
2985 }
2986
2987 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
2988 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2989 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002990 for (uint32_t n = 1; n <= 16; n++) {
2991 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002992 GemmMicrokernelTester()
2993 .mr(1)
2994 .nr(16)
2995 .kr(1)
2996 .sr(1)
2997 .m(m)
2998 .n(n)
2999 .k(k)
3000 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003001 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003002 }
3003 }
3004 }
3005 }
3006
3007 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16) {
3008 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3009 for (uint32_t n = 17; n < 32; n++) {
3010 for (size_t k = 1; k <= 20; k += 5) {
3011 GemmMicrokernelTester()
3012 .mr(1)
3013 .nr(16)
3014 .kr(1)
3015 .sr(1)
3016 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003017 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003018 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003019 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003020 }
3021 }
3022 }
3023
3024 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
3025 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3026 for (uint32_t n = 17; n < 32; n++) {
3027 for (size_t k = 1; k <= 20; k += 5) {
3028 GemmMicrokernelTester()
3029 .mr(1)
3030 .nr(16)
3031 .kr(1)
3032 .sr(1)
3033 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003034 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003035 .k(k)
3036 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003037 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003038 }
3039 }
3040 }
3041
3042 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16_strided_a) {
3043 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3044 for (uint32_t n = 17; n < 32; n++) {
3045 for (size_t k = 1; k <= 20; k += 5) {
3046 GemmMicrokernelTester()
3047 .mr(1)
3048 .nr(16)
3049 .kr(1)
3050 .sr(1)
3051 .m(1)
3052 .n(n)
3053 .k(k)
3054 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003055 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003056 }
3057 }
3058 }
3059
3060 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
3061 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3062 for (uint32_t n = 17; n < 32; n++) {
3063 for (size_t k = 1; k <= 20; k += 5) {
3064 for (uint32_t m = 1; m <= 1; m++) {
3065 GemmMicrokernelTester()
3066 .mr(1)
3067 .nr(16)
3068 .kr(1)
3069 .sr(1)
3070 .m(m)
3071 .n(n)
3072 .k(k)
3073 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003074 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003075 }
3076 }
3077 }
3078 }
3079
3080 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16) {
3081 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3082 for (uint32_t n = 32; n <= 48; n += 16) {
3083 for (size_t k = 1; k <= 20; k += 5) {
3084 GemmMicrokernelTester()
3085 .mr(1)
3086 .nr(16)
3087 .kr(1)
3088 .sr(1)
3089 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003090 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003091 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003092 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003093 }
3094 }
3095 }
3096
3097 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
3098 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3099 for (uint32_t n = 32; n <= 48; n += 16) {
3100 for (size_t k = 1; k <= 20; k += 5) {
3101 GemmMicrokernelTester()
3102 .mr(1)
3103 .nr(16)
3104 .kr(1)
3105 .sr(1)
3106 .m(1)
3107 .n(n)
3108 .k(k)
3109 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003110 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003111 }
3112 }
3113 }
3114
3115 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16_strided_a) {
3116 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3117 for (uint32_t n = 32; n <= 48; n += 16) {
3118 for (size_t k = 1; k <= 20; k += 5) {
3119 GemmMicrokernelTester()
3120 .mr(1)
3121 .nr(16)
3122 .kr(1)
3123 .sr(1)
3124 .m(1)
3125 .n(n)
3126 .k(k)
3127 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003128 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003129 }
3130 }
3131 }
3132
3133 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
3134 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3135 for (uint32_t n = 32; n <= 48; n += 16) {
3136 for (size_t k = 1; k <= 20; k += 5) {
3137 for (uint32_t m = 1; m <= 1; m++) {
3138 GemmMicrokernelTester()
3139 .mr(1)
3140 .nr(16)
3141 .kr(1)
3142 .sr(1)
3143 .m(m)
3144 .n(n)
3145 .k(k)
3146 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003147 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003148 }
3149 }
3150 }
3151 }
3152
3153 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
3154 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3155 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003156 for (uint32_t n = 1; n <= 16; n++) {
3157 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003158 GemmMicrokernelTester()
3159 .mr(1)
3160 .nr(16)
3161 .kr(1)
3162 .sr(1)
3163 .m(m)
3164 .n(n)
3165 .k(k)
3166 .cm_stride(19)
3167 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003168 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003169 }
3170 }
3171 }
3172 }
3173
3174 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, qmin) {
3175 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3176 GemmMicrokernelTester()
3177 .mr(1)
3178 .nr(16)
3179 .kr(1)
3180 .sr(1)
3181 .m(1)
3182 .n(16)
3183 .k(4)
3184 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003185 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003186 }
3187
3188 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, qmax) {
3189 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3190 GemmMicrokernelTester()
3191 .mr(1)
3192 .nr(16)
3193 .kr(1)
3194 .sr(1)
3195 .m(1)
3196 .n(16)
3197 .k(4)
3198 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003199 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003200 }
3201
3202 TEST(F16_GEMM_MINMAX_1X16__NEONFP16ARITH_LD64, strided_cm) {
3203 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3204 GemmMicrokernelTester()
3205 .mr(1)
3206 .nr(16)
3207 .kr(1)
3208 .sr(1)
3209 .m(1)
3210 .n(16)
3211 .k(4)
3212 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003213 .Test(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003214 }
3215#endif // XNN_ARCH_ARM64
3216
3217
3218#if XNN_ARCH_ARM64
3219 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4) {
3220 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3221 GemmMicrokernelTester()
3222 .mr(4)
3223 .nr(16)
3224 .kr(1)
3225 .sr(1)
3226 .m(4)
3227 .n(16)
3228 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003229 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003230 }
3231
3232 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, strided_cn) {
3233 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3234 GemmMicrokernelTester()
3235 .mr(4)
3236 .nr(16)
3237 .kr(1)
3238 .sr(1)
3239 .m(4)
3240 .n(16)
3241 .k(4)
3242 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003243 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003244 }
3245
3246 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
3247 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3248 GemmMicrokernelTester()
3249 .mr(4)
3250 .nr(16)
3251 .kr(1)
3252 .sr(1)
3253 .m(4)
3254 .n(16)
3255 .k(4)
3256 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003257 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003258 }
3259
3260 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
3261 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003262 for (uint32_t n = 1; n <= 16; n++) {
3263 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003264 GemmMicrokernelTester()
3265 .mr(4)
3266 .nr(16)
3267 .kr(1)
3268 .sr(1)
3269 .m(m)
3270 .n(n)
3271 .k(4)
3272 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003273 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003274 }
3275 }
3276 }
3277
3278 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
3279 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3280 for (uint32_t m = 1; m <= 4; m++) {
3281 GemmMicrokernelTester()
3282 .mr(4)
3283 .nr(16)
3284 .kr(1)
3285 .sr(1)
3286 .m(m)
3287 .n(16)
3288 .k(4)
3289 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003290 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003291 }
3292 }
3293
3294 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
3295 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3296 for (uint32_t n = 1; n <= 16; n++) {
3297 GemmMicrokernelTester()
3298 .mr(4)
3299 .nr(16)
3300 .kr(1)
3301 .sr(1)
3302 .m(4)
3303 .n(n)
3304 .k(4)
3305 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003306 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003307 }
3308 }
3309
3310 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_lt_4) {
3311 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3312 for (size_t k = 1; k < 4; k++) {
3313 GemmMicrokernelTester()
3314 .mr(4)
3315 .nr(16)
3316 .kr(1)
3317 .sr(1)
3318 .m(4)
3319 .n(16)
3320 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003321 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003322 }
3323 }
3324
3325 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
3326 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3327 for (size_t k = 1; k < 4; k++) {
3328 GemmMicrokernelTester()
3329 .mr(4)
3330 .nr(16)
3331 .kr(1)
3332 .sr(1)
3333 .m(4)
3334 .n(16)
3335 .k(k)
3336 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003337 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003338 }
3339 }
3340
3341 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
3342 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3343 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003344 for (uint32_t n = 1; n <= 16; n++) {
3345 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003346 GemmMicrokernelTester()
3347 .mr(4)
3348 .nr(16)
3349 .kr(1)
3350 .sr(1)
3351 .m(m)
3352 .n(n)
3353 .k(k)
3354 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003355 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003356 }
3357 }
3358 }
3359 }
3360
3361 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_gt_4) {
3362 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3363 for (size_t k = 5; k < 8; k++) {
3364 GemmMicrokernelTester()
3365 .mr(4)
3366 .nr(16)
3367 .kr(1)
3368 .sr(1)
3369 .m(4)
3370 .n(16)
3371 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003372 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003373 }
3374 }
3375
3376 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
3377 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3378 for (size_t k = 5; k < 8; k++) {
3379 GemmMicrokernelTester()
3380 .mr(4)
3381 .nr(16)
3382 .kr(1)
3383 .sr(1)
3384 .m(4)
3385 .n(16)
3386 .k(k)
3387 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003388 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003389 }
3390 }
3391
3392 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
3393 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3394 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003395 for (uint32_t n = 1; n <= 16; n++) {
3396 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003397 GemmMicrokernelTester()
3398 .mr(4)
3399 .nr(16)
3400 .kr(1)
3401 .sr(1)
3402 .m(m)
3403 .n(n)
3404 .k(k)
3405 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003406 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003407 }
3408 }
3409 }
3410 }
3411
3412 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_div_4) {
3413 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3414 for (size_t k = 8; k <= 40; k += 4) {
3415 GemmMicrokernelTester()
3416 .mr(4)
3417 .nr(16)
3418 .kr(1)
3419 .sr(1)
3420 .m(4)
3421 .n(16)
3422 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003423 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003424 }
3425 }
3426
3427 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_div_4_strided_a) {
3428 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3429 for (size_t k = 8; k <= 40; k += 4) {
3430 GemmMicrokernelTester()
3431 .mr(4)
3432 .nr(16)
3433 .kr(1)
3434 .sr(1)
3435 .m(4)
3436 .n(16)
3437 .k(k)
3438 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003439 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003440 }
3441 }
3442
3443 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
3444 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3445 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003446 for (uint32_t n = 1; n <= 16; n++) {
3447 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003448 GemmMicrokernelTester()
3449 .mr(4)
3450 .nr(16)
3451 .kr(1)
3452 .sr(1)
3453 .m(m)
3454 .n(n)
3455 .k(k)
3456 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003457 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003458 }
3459 }
3460 }
3461 }
3462
3463 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16) {
3464 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3465 for (uint32_t n = 17; n < 32; n++) {
3466 for (size_t k = 1; k <= 20; k += 5) {
3467 GemmMicrokernelTester()
3468 .mr(4)
3469 .nr(16)
3470 .kr(1)
3471 .sr(1)
3472 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003473 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003474 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003475 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003476 }
3477 }
3478 }
3479
3480 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
3481 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3482 for (uint32_t n = 17; n < 32; n++) {
3483 for (size_t k = 1; k <= 20; k += 5) {
3484 GemmMicrokernelTester()
3485 .mr(4)
3486 .nr(16)
3487 .kr(1)
3488 .sr(1)
3489 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003490 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003491 .k(k)
3492 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003493 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003494 }
3495 }
3496 }
3497
3498 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16_strided_a) {
3499 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3500 for (uint32_t n = 17; n < 32; n++) {
3501 for (size_t k = 1; k <= 20; k += 5) {
3502 GemmMicrokernelTester()
3503 .mr(4)
3504 .nr(16)
3505 .kr(1)
3506 .sr(1)
3507 .m(4)
3508 .n(n)
3509 .k(k)
3510 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003511 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003512 }
3513 }
3514 }
3515
3516 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
3517 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3518 for (uint32_t n = 17; n < 32; n++) {
3519 for (size_t k = 1; k <= 20; k += 5) {
3520 for (uint32_t m = 1; m <= 4; m++) {
3521 GemmMicrokernelTester()
3522 .mr(4)
3523 .nr(16)
3524 .kr(1)
3525 .sr(1)
3526 .m(m)
3527 .n(n)
3528 .k(k)
3529 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003530 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003531 }
3532 }
3533 }
3534 }
3535
3536 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16) {
3537 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3538 for (uint32_t n = 32; n <= 48; n += 16) {
3539 for (size_t k = 1; k <= 20; k += 5) {
3540 GemmMicrokernelTester()
3541 .mr(4)
3542 .nr(16)
3543 .kr(1)
3544 .sr(1)
3545 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003546 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003547 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003548 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003549 }
3550 }
3551 }
3552
3553 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
3554 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3555 for (uint32_t n = 32; n <= 48; n += 16) {
3556 for (size_t k = 1; k <= 20; k += 5) {
3557 GemmMicrokernelTester()
3558 .mr(4)
3559 .nr(16)
3560 .kr(1)
3561 .sr(1)
3562 .m(4)
3563 .n(n)
3564 .k(k)
3565 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003566 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003567 }
3568 }
3569 }
3570
3571 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16_strided_a) {
3572 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3573 for (uint32_t n = 32; n <= 48; n += 16) {
3574 for (size_t k = 1; k <= 20; k += 5) {
3575 GemmMicrokernelTester()
3576 .mr(4)
3577 .nr(16)
3578 .kr(1)
3579 .sr(1)
3580 .m(4)
3581 .n(n)
3582 .k(k)
3583 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003584 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003585 }
3586 }
3587 }
3588
3589 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
3590 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3591 for (uint32_t n = 32; n <= 48; n += 16) {
3592 for (size_t k = 1; k <= 20; k += 5) {
3593 for (uint32_t m = 1; m <= 4; m++) {
3594 GemmMicrokernelTester()
3595 .mr(4)
3596 .nr(16)
3597 .kr(1)
3598 .sr(1)
3599 .m(m)
3600 .n(n)
3601 .k(k)
3602 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003603 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003604 }
3605 }
3606 }
3607 }
3608
3609 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
3610 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3611 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003612 for (uint32_t n = 1; n <= 16; n++) {
3613 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003614 GemmMicrokernelTester()
3615 .mr(4)
3616 .nr(16)
3617 .kr(1)
3618 .sr(1)
3619 .m(m)
3620 .n(n)
3621 .k(k)
3622 .cm_stride(19)
3623 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003624 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003625 }
3626 }
3627 }
3628 }
3629
3630 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, qmin) {
3631 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3632 GemmMicrokernelTester()
3633 .mr(4)
3634 .nr(16)
3635 .kr(1)
3636 .sr(1)
3637 .m(4)
3638 .n(16)
3639 .k(4)
3640 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003641 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003642 }
3643
3644 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, qmax) {
3645 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3646 GemmMicrokernelTester()
3647 .mr(4)
3648 .nr(16)
3649 .kr(1)
3650 .sr(1)
3651 .m(4)
3652 .n(16)
3653 .k(4)
3654 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003655 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003656 }
3657
3658 TEST(F16_GEMM_MINMAX_4X16__NEONFP16ARITH_LD64, strided_cm) {
3659 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3660 GemmMicrokernelTester()
3661 .mr(4)
3662 .nr(16)
3663 .kr(1)
3664 .sr(1)
3665 .m(4)
3666 .n(16)
3667 .k(4)
3668 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003669 .Test(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003670 }
3671#endif // XNN_ARCH_ARM64
3672
3673
3674#if XNN_ARCH_ARM64
3675 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4) {
3676 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3677 GemmMicrokernelTester()
3678 .mr(6)
3679 .nr(16)
3680 .kr(1)
3681 .sr(1)
3682 .m(6)
3683 .n(16)
3684 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003685 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003686 }
3687
3688 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, strided_cn) {
3689 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3690 GemmMicrokernelTester()
3691 .mr(6)
3692 .nr(16)
3693 .kr(1)
3694 .sr(1)
3695 .m(6)
3696 .n(16)
3697 .k(4)
3698 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003699 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003700 }
3701
3702 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
3703 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3704 GemmMicrokernelTester()
3705 .mr(6)
3706 .nr(16)
3707 .kr(1)
3708 .sr(1)
3709 .m(6)
3710 .n(16)
3711 .k(4)
3712 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003713 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003714 }
3715
3716 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
3717 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003718 for (uint32_t n = 1; n <= 16; n++) {
3719 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003720 GemmMicrokernelTester()
3721 .mr(6)
3722 .nr(16)
3723 .kr(1)
3724 .sr(1)
3725 .m(m)
3726 .n(n)
3727 .k(4)
3728 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003729 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003730 }
3731 }
3732 }
3733
3734 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
3735 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3736 for (uint32_t m = 1; m <= 6; m++) {
3737 GemmMicrokernelTester()
3738 .mr(6)
3739 .nr(16)
3740 .kr(1)
3741 .sr(1)
3742 .m(m)
3743 .n(16)
3744 .k(4)
3745 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003746 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003747 }
3748 }
3749
3750 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
3751 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3752 for (uint32_t n = 1; n <= 16; n++) {
3753 GemmMicrokernelTester()
3754 .mr(6)
3755 .nr(16)
3756 .kr(1)
3757 .sr(1)
3758 .m(6)
3759 .n(n)
3760 .k(4)
3761 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003762 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003763 }
3764 }
3765
3766 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_lt_4) {
3767 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3768 for (size_t k = 1; k < 4; k++) {
3769 GemmMicrokernelTester()
3770 .mr(6)
3771 .nr(16)
3772 .kr(1)
3773 .sr(1)
3774 .m(6)
3775 .n(16)
3776 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003777 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003778 }
3779 }
3780
3781 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
3782 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3783 for (size_t k = 1; k < 4; k++) {
3784 GemmMicrokernelTester()
3785 .mr(6)
3786 .nr(16)
3787 .kr(1)
3788 .sr(1)
3789 .m(6)
3790 .n(16)
3791 .k(k)
3792 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003793 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003794 }
3795 }
3796
3797 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
3798 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3799 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 16; n++) {
3801 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003802 GemmMicrokernelTester()
3803 .mr(6)
3804 .nr(16)
3805 .kr(1)
3806 .sr(1)
3807 .m(m)
3808 .n(n)
3809 .k(k)
3810 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003811 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003812 }
3813 }
3814 }
3815 }
3816
3817 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_gt_4) {
3818 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3819 for (size_t k = 5; k < 8; k++) {
3820 GemmMicrokernelTester()
3821 .mr(6)
3822 .nr(16)
3823 .kr(1)
3824 .sr(1)
3825 .m(6)
3826 .n(16)
3827 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003828 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003829 }
3830 }
3831
3832 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
3833 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3834 for (size_t k = 5; k < 8; k++) {
3835 GemmMicrokernelTester()
3836 .mr(6)
3837 .nr(16)
3838 .kr(1)
3839 .sr(1)
3840 .m(6)
3841 .n(16)
3842 .k(k)
3843 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003844 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003845 }
3846 }
3847
3848 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
3849 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3850 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003851 for (uint32_t n = 1; n <= 16; n++) {
3852 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003853 GemmMicrokernelTester()
3854 .mr(6)
3855 .nr(16)
3856 .kr(1)
3857 .sr(1)
3858 .m(m)
3859 .n(n)
3860 .k(k)
3861 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003862 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003863 }
3864 }
3865 }
3866 }
3867
3868 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_div_4) {
3869 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3870 for (size_t k = 8; k <= 40; k += 4) {
3871 GemmMicrokernelTester()
3872 .mr(6)
3873 .nr(16)
3874 .kr(1)
3875 .sr(1)
3876 .m(6)
3877 .n(16)
3878 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003879 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003880 }
3881 }
3882
3883 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_div_4_strided_a) {
3884 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3885 for (size_t k = 8; k <= 40; k += 4) {
3886 GemmMicrokernelTester()
3887 .mr(6)
3888 .nr(16)
3889 .kr(1)
3890 .sr(1)
3891 .m(6)
3892 .n(16)
3893 .k(k)
3894 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003895 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003896 }
3897 }
3898
3899 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
3900 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3901 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003902 for (uint32_t n = 1; n <= 16; n++) {
3903 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003904 GemmMicrokernelTester()
3905 .mr(6)
3906 .nr(16)
3907 .kr(1)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003913 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003914 }
3915 }
3916 }
3917 }
3918
3919 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16) {
3920 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3921 for (uint32_t n = 17; n < 32; n++) {
3922 for (size_t k = 1; k <= 20; k += 5) {
3923 GemmMicrokernelTester()
3924 .mr(6)
3925 .nr(16)
3926 .kr(1)
3927 .sr(1)
3928 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003929 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003930 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003931 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003932 }
3933 }
3934 }
3935
3936 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
3937 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3938 for (uint32_t n = 17; n < 32; n++) {
3939 for (size_t k = 1; k <= 20; k += 5) {
3940 GemmMicrokernelTester()
3941 .mr(6)
3942 .nr(16)
3943 .kr(1)
3944 .sr(1)
3945 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003946 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003947 .k(k)
3948 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003949 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003950 }
3951 }
3952 }
3953
3954 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16_strided_a) {
3955 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3956 for (uint32_t n = 17; n < 32; n++) {
3957 for (size_t k = 1; k <= 20; k += 5) {
3958 GemmMicrokernelTester()
3959 .mr(6)
3960 .nr(16)
3961 .kr(1)
3962 .sr(1)
3963 .m(6)
3964 .n(n)
3965 .k(k)
3966 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003967 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003968 }
3969 }
3970 }
3971
3972 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
3973 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3974 for (uint32_t n = 17; n < 32; n++) {
3975 for (size_t k = 1; k <= 20; k += 5) {
3976 for (uint32_t m = 1; m <= 6; m++) {
3977 GemmMicrokernelTester()
3978 .mr(6)
3979 .nr(16)
3980 .kr(1)
3981 .sr(1)
3982 .m(m)
3983 .n(n)
3984 .k(k)
3985 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003986 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003987 }
3988 }
3989 }
3990 }
3991
3992 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16) {
3993 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3994 for (uint32_t n = 32; n <= 48; n += 16) {
3995 for (size_t k = 1; k <= 20; k += 5) {
3996 GemmMicrokernelTester()
3997 .mr(6)
3998 .nr(16)
3999 .kr(1)
4000 .sr(1)
4001 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004002 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004003 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004004 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004005 }
4006 }
4007 }
4008
4009 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
4010 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4011 for (uint32_t n = 32; n <= 48; n += 16) {
4012 for (size_t k = 1; k <= 20; k += 5) {
4013 GemmMicrokernelTester()
4014 .mr(6)
4015 .nr(16)
4016 .kr(1)
4017 .sr(1)
4018 .m(6)
4019 .n(n)
4020 .k(k)
4021 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004022 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004023 }
4024 }
4025 }
4026
4027 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16_strided_a) {
4028 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4029 for (uint32_t n = 32; n <= 48; n += 16) {
4030 for (size_t k = 1; k <= 20; k += 5) {
4031 GemmMicrokernelTester()
4032 .mr(6)
4033 .nr(16)
4034 .kr(1)
4035 .sr(1)
4036 .m(6)
4037 .n(n)
4038 .k(k)
4039 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004040 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004041 }
4042 }
4043 }
4044
4045 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
4046 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4047 for (uint32_t n = 32; n <= 48; n += 16) {
4048 for (size_t k = 1; k <= 20; k += 5) {
4049 for (uint32_t m = 1; m <= 6; m++) {
4050 GemmMicrokernelTester()
4051 .mr(6)
4052 .nr(16)
4053 .kr(1)
4054 .sr(1)
4055 .m(m)
4056 .n(n)
4057 .k(k)
4058 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004059 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004060 }
4061 }
4062 }
4063 }
4064
4065 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
4066 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4067 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004068 for (uint32_t n = 1; n <= 16; n++) {
4069 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004070 GemmMicrokernelTester()
4071 .mr(6)
4072 .nr(16)
4073 .kr(1)
4074 .sr(1)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .cm_stride(19)
4079 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004080 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004081 }
4082 }
4083 }
4084 }
4085
4086 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, qmin) {
4087 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4088 GemmMicrokernelTester()
4089 .mr(6)
4090 .nr(16)
4091 .kr(1)
4092 .sr(1)
4093 .m(6)
4094 .n(16)
4095 .k(4)
4096 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004097 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004098 }
4099
4100 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, qmax) {
4101 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4102 GemmMicrokernelTester()
4103 .mr(6)
4104 .nr(16)
4105 .kr(1)
4106 .sr(1)
4107 .m(6)
4108 .n(16)
4109 .k(4)
4110 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004111 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004112 }
4113
4114 TEST(F16_GEMM_MINMAX_6X16__NEONFP16ARITH_LD64, strided_cm) {
4115 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4116 GemmMicrokernelTester()
4117 .mr(6)
4118 .nr(16)
4119 .kr(1)
4120 .sr(1)
4121 .m(6)
4122 .n(16)
4123 .k(4)
4124 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004125 .Test(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004126 }
4127#endif // XNN_ARCH_ARM64
4128
4129
4130#if XNN_ARCH_ARM64
4131 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4) {
4132 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4133 GemmMicrokernelTester()
4134 .mr(8)
4135 .nr(16)
4136 .kr(1)
4137 .sr(1)
4138 .m(8)
4139 .n(16)
4140 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004141 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004142 }
4143
4144 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, strided_cn) {
4145 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4146 GemmMicrokernelTester()
4147 .mr(8)
4148 .nr(16)
4149 .kr(1)
4150 .sr(1)
4151 .m(8)
4152 .n(16)
4153 .k(4)
4154 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004155 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004156 }
4157
4158 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_strided_a) {
4159 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4160 GemmMicrokernelTester()
4161 .mr(8)
4162 .nr(16)
4163 .kr(1)
4164 .sr(1)
4165 .m(8)
4166 .n(16)
4167 .k(4)
4168 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004169 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004170 }
4171
4172 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
4173 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004174 for (uint32_t n = 1; n <= 16; n++) {
4175 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004176 GemmMicrokernelTester()
4177 .mr(8)
4178 .nr(16)
4179 .kr(1)
4180 .sr(1)
4181 .m(m)
4182 .n(n)
4183 .k(4)
4184 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004185 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004186 }
4187 }
4188 }
4189
4190 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
4191 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4192 for (uint32_t m = 1; m <= 8; m++) {
4193 GemmMicrokernelTester()
4194 .mr(8)
4195 .nr(16)
4196 .kr(1)
4197 .sr(1)
4198 .m(m)
4199 .n(16)
4200 .k(4)
4201 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004202 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004203 }
4204 }
4205
4206 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
4207 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4208 for (uint32_t n = 1; n <= 16; n++) {
4209 GemmMicrokernelTester()
4210 .mr(8)
4211 .nr(16)
4212 .kr(1)
4213 .sr(1)
4214 .m(8)
4215 .n(n)
4216 .k(4)
4217 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004218 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004219 }
4220 }
4221
4222 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_lt_4) {
4223 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4224 for (size_t k = 1; k < 4; k++) {
4225 GemmMicrokernelTester()
4226 .mr(8)
4227 .nr(16)
4228 .kr(1)
4229 .sr(1)
4230 .m(8)
4231 .n(16)
4232 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004233 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004234 }
4235 }
4236
4237 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_lt_4_strided_a) {
4238 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4239 for (size_t k = 1; k < 4; k++) {
4240 GemmMicrokernelTester()
4241 .mr(8)
4242 .nr(16)
4243 .kr(1)
4244 .sr(1)
4245 .m(8)
4246 .n(16)
4247 .k(k)
4248 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004249 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004250 }
4251 }
4252
4253 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
4254 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4255 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004256 for (uint32_t n = 1; n <= 16; n++) {
4257 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004258 GemmMicrokernelTester()
4259 .mr(8)
4260 .nr(16)
4261 .kr(1)
4262 .sr(1)
4263 .m(m)
4264 .n(n)
4265 .k(k)
4266 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004267 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004268 }
4269 }
4270 }
4271 }
4272
4273 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_gt_4) {
4274 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4275 for (size_t k = 5; k < 8; k++) {
4276 GemmMicrokernelTester()
4277 .mr(8)
4278 .nr(16)
4279 .kr(1)
4280 .sr(1)
4281 .m(8)
4282 .n(16)
4283 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004284 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004285 }
4286 }
4287
4288 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_gt_4_strided_a) {
4289 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4290 for (size_t k = 5; k < 8; k++) {
4291 GemmMicrokernelTester()
4292 .mr(8)
4293 .nr(16)
4294 .kr(1)
4295 .sr(1)
4296 .m(8)
4297 .n(16)
4298 .k(k)
4299 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004300 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004301 }
4302 }
4303
4304 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
4305 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4306 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004307 for (uint32_t n = 1; n <= 16; n++) {
4308 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004309 GemmMicrokernelTester()
4310 .mr(8)
4311 .nr(16)
4312 .kr(1)
4313 .sr(1)
4314 .m(m)
4315 .n(n)
4316 .k(k)
4317 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004318 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004319 }
4320 }
4321 }
4322 }
4323
4324 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_div_4) {
4325 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4326 for (size_t k = 8; k <= 40; k += 4) {
4327 GemmMicrokernelTester()
4328 .mr(8)
4329 .nr(16)
4330 .kr(1)
4331 .sr(1)
4332 .m(8)
4333 .n(16)
4334 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004335 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004336 }
4337 }
4338
4339 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_div_4_strided_a) {
4340 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4341 for (size_t k = 8; k <= 40; k += 4) {
4342 GemmMicrokernelTester()
4343 .mr(8)
4344 .nr(16)
4345 .kr(1)
4346 .sr(1)
4347 .m(8)
4348 .n(16)
4349 .k(k)
4350 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004351 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004352 }
4353 }
4354
4355 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
4356 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4357 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004358 for (uint32_t n = 1; n <= 16; n++) {
4359 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004360 GemmMicrokernelTester()
4361 .mr(8)
4362 .nr(16)
4363 .kr(1)
4364 .sr(1)
4365 .m(m)
4366 .n(n)
4367 .k(k)
4368 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004369 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004370 }
4371 }
4372 }
4373 }
4374
4375 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16) {
4376 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4377 for (uint32_t n = 17; n < 32; n++) {
4378 for (size_t k = 1; k <= 20; k += 5) {
4379 GemmMicrokernelTester()
4380 .mr(8)
4381 .nr(16)
4382 .kr(1)
4383 .sr(1)
4384 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004385 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004386 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004387 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004388 }
4389 }
4390 }
4391
4392 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
4393 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4394 for (uint32_t n = 17; n < 32; n++) {
4395 for (size_t k = 1; k <= 20; k += 5) {
4396 GemmMicrokernelTester()
4397 .mr(8)
4398 .nr(16)
4399 .kr(1)
4400 .sr(1)
4401 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004402 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004403 .k(k)
4404 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004405 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004406 }
4407 }
4408 }
4409
4410 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16_strided_a) {
4411 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4412 for (uint32_t n = 17; n < 32; n++) {
4413 for (size_t k = 1; k <= 20; k += 5) {
4414 GemmMicrokernelTester()
4415 .mr(8)
4416 .nr(16)
4417 .kr(1)
4418 .sr(1)
4419 .m(8)
4420 .n(n)
4421 .k(k)
4422 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004423 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004424 }
4425 }
4426 }
4427
4428 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
4429 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4430 for (uint32_t n = 17; n < 32; n++) {
4431 for (size_t k = 1; k <= 20; k += 5) {
4432 for (uint32_t m = 1; m <= 8; m++) {
4433 GemmMicrokernelTester()
4434 .mr(8)
4435 .nr(16)
4436 .kr(1)
4437 .sr(1)
4438 .m(m)
4439 .n(n)
4440 .k(k)
4441 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004442 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004443 }
4444 }
4445 }
4446 }
4447
4448 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16) {
4449 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4450 for (uint32_t n = 32; n <= 48; n += 16) {
4451 for (size_t k = 1; k <= 20; k += 5) {
4452 GemmMicrokernelTester()
4453 .mr(8)
4454 .nr(16)
4455 .kr(1)
4456 .sr(1)
4457 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004458 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004459 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004460 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004461 }
4462 }
4463 }
4464
4465 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
4466 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4467 for (uint32_t n = 32; n <= 48; n += 16) {
4468 for (size_t k = 1; k <= 20; k += 5) {
4469 GemmMicrokernelTester()
4470 .mr(8)
4471 .nr(16)
4472 .kr(1)
4473 .sr(1)
4474 .m(8)
4475 .n(n)
4476 .k(k)
4477 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004478 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004479 }
4480 }
4481 }
4482
4483 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16_strided_a) {
4484 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4485 for (uint32_t n = 32; n <= 48; n += 16) {
4486 for (size_t k = 1; k <= 20; k += 5) {
4487 GemmMicrokernelTester()
4488 .mr(8)
4489 .nr(16)
4490 .kr(1)
4491 .sr(1)
4492 .m(8)
4493 .n(n)
4494 .k(k)
4495 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004496 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004497 }
4498 }
4499 }
4500
4501 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
4502 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4503 for (uint32_t n = 32; n <= 48; n += 16) {
4504 for (size_t k = 1; k <= 20; k += 5) {
4505 for (uint32_t m = 1; m <= 8; m++) {
4506 GemmMicrokernelTester()
4507 .mr(8)
4508 .nr(16)
4509 .kr(1)
4510 .sr(1)
4511 .m(m)
4512 .n(n)
4513 .k(k)
4514 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004515 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004516 }
4517 }
4518 }
4519 }
4520
4521 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
4522 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4523 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004524 for (uint32_t n = 1; n <= 16; n++) {
4525 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004526 GemmMicrokernelTester()
4527 .mr(8)
4528 .nr(16)
4529 .kr(1)
4530 .sr(1)
4531 .m(m)
4532 .n(n)
4533 .k(k)
4534 .cm_stride(19)
4535 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004536 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004537 }
4538 }
4539 }
4540 }
4541
4542 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, qmin) {
4543 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4544 GemmMicrokernelTester()
4545 .mr(8)
4546 .nr(16)
4547 .kr(1)
4548 .sr(1)
4549 .m(8)
4550 .n(16)
4551 .k(4)
4552 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004553 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004554 }
4555
4556 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, qmax) {
4557 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4558 GemmMicrokernelTester()
4559 .mr(8)
4560 .nr(16)
4561 .kr(1)
4562 .sr(1)
4563 .m(8)
4564 .n(16)
4565 .k(4)
4566 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004567 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004568 }
4569
4570 TEST(F16_GEMM_MINMAX_8X16__NEONFP16ARITH_LD64, strided_cm) {
4571 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4572 GemmMicrokernelTester()
4573 .mr(8)
4574 .nr(16)
4575 .kr(1)
4576 .sr(1)
4577 .m(8)
4578 .n(16)
4579 .k(4)
4580 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004581 .Test(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07004582 }
4583#endif // XNN_ARCH_ARM64
4584
4585
Frank Barcharde4d3f762021-12-23 15:31:43 -08004586#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard36b76b62020-04-10 12:39:17 -07004587 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2) {
4588 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4589 GemmMicrokernelTester()
4590 .mr(1)
4591 .nr(16)
4592 .kr(1)
4593 .sr(1)
4594 .m(1)
4595 .n(16)
4596 .k(2)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004597 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004598 }
4599
4600 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, strided_cn) {
4601 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4602 GemmMicrokernelTester()
4603 .mr(1)
4604 .nr(16)
4605 .kr(1)
4606 .sr(1)
4607 .m(1)
4608 .n(16)
4609 .k(2)
4610 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004611 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004612 }
4613
4614 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_strided_a) {
4615 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4616 GemmMicrokernelTester()
4617 .mr(1)
4618 .nr(16)
4619 .kr(1)
4620 .sr(1)
4621 .m(1)
4622 .n(16)
4623 .k(2)
4624 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004625 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004626 }
4627
4628 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile) {
4629 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004630 for (uint32_t n = 1; n <= 16; n++) {
4631 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard36b76b62020-04-10 12:39:17 -07004632 GemmMicrokernelTester()
4633 .mr(1)
4634 .nr(16)
4635 .kr(1)
4636 .sr(1)
4637 .m(m)
4638 .n(n)
4639 .k(2)
4640 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004641 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004642 }
4643 }
4644 }
4645
4646 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_m) {
4647 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4648 for (uint32_t m = 1; m <= 1; m++) {
4649 GemmMicrokernelTester()
4650 .mr(1)
4651 .nr(16)
4652 .kr(1)
4653 .sr(1)
4654 .m(m)
4655 .n(16)
4656 .k(2)
4657 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004658 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004659 }
4660 }
4661
4662 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_n) {
4663 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4664 for (uint32_t n = 1; n <= 16; n++) {
4665 GemmMicrokernelTester()
4666 .mr(1)
4667 .nr(16)
4668 .kr(1)
4669 .sr(1)
4670 .m(1)
4671 .n(n)
4672 .k(2)
4673 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004674 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004675 }
4676 }
4677
4678 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2) {
4679 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4680 for (size_t k = 1; k < 2; k++) {
4681 GemmMicrokernelTester()
4682 .mr(1)
4683 .nr(16)
4684 .kr(1)
4685 .sr(1)
4686 .m(1)
4687 .n(16)
4688 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004689 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004690 }
4691 }
4692
4693 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_strided_a) {
4694 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4695 for (size_t k = 1; k < 2; k++) {
4696 GemmMicrokernelTester()
4697 .mr(1)
4698 .nr(16)
4699 .kr(1)
4700 .sr(1)
4701 .m(1)
4702 .n(16)
4703 .k(k)
4704 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004705 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004706 }
4707 }
4708
4709 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_subtile) {
4710 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4711 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004712 for (uint32_t n = 1; n <= 16; n++) {
4713 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard36b76b62020-04-10 12:39:17 -07004714 GemmMicrokernelTester()
4715 .mr(1)
4716 .nr(16)
4717 .kr(1)
4718 .sr(1)
4719 .m(m)
4720 .n(n)
4721 .k(k)
4722 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004723 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004724 }
4725 }
4726 }
4727 }
4728
4729 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2) {
4730 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4731 for (size_t k = 3; k < 4; k++) {
4732 GemmMicrokernelTester()
4733 .mr(1)
4734 .nr(16)
4735 .kr(1)
4736 .sr(1)
4737 .m(1)
4738 .n(16)
4739 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004740 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004741 }
4742 }
4743
4744 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_strided_a) {
4745 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4746 for (size_t k = 3; k < 4; k++) {
4747 GemmMicrokernelTester()
4748 .mr(1)
4749 .nr(16)
4750 .kr(1)
4751 .sr(1)
4752 .m(1)
4753 .n(16)
4754 .k(k)
4755 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004756 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004757 }
4758 }
4759
4760 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_subtile) {
4761 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4762 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004763 for (uint32_t n = 1; n <= 16; n++) {
4764 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard36b76b62020-04-10 12:39:17 -07004765 GemmMicrokernelTester()
4766 .mr(1)
4767 .nr(16)
4768 .kr(1)
4769 .sr(1)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004774 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004775 }
4776 }
4777 }
4778 }
4779
4780 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_div_2) {
4781 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4782 for (size_t k = 4; k <= 20; k += 2) {
4783 GemmMicrokernelTester()
4784 .mr(1)
4785 .nr(16)
4786 .kr(1)
4787 .sr(1)
4788 .m(1)
4789 .n(16)
4790 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004791 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004792 }
4793 }
4794
4795 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_strided_a) {
4796 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4797 for (size_t k = 4; k <= 20; k += 2) {
4798 GemmMicrokernelTester()
4799 .mr(1)
4800 .nr(16)
4801 .kr(1)
4802 .sr(1)
4803 .m(1)
4804 .n(16)
4805 .k(k)
4806 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004807 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004808 }
4809 }
4810
4811 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_subtile) {
4812 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4813 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004814 for (uint32_t n = 1; n <= 16; n++) {
4815 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard36b76b62020-04-10 12:39:17 -07004816 GemmMicrokernelTester()
4817 .mr(1)
4818 .nr(16)
4819 .kr(1)
4820 .sr(1)
4821 .m(m)
4822 .n(n)
4823 .k(k)
4824 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004825 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004826 }
4827 }
4828 }
4829 }
4830
4831 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16) {
4832 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4833 for (uint32_t n = 17; n < 32; n++) {
4834 for (size_t k = 1; k <= 10; k += 3) {
4835 GemmMicrokernelTester()
4836 .mr(1)
4837 .nr(16)
4838 .kr(1)
4839 .sr(1)
4840 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004841 .n(n)
Frank Barchard36b76b62020-04-10 12:39:17 -07004842 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004843 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004844 }
4845 }
4846 }
4847
4848 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_cn) {
4849 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4850 for (uint32_t n = 17; n < 32; n++) {
4851 for (size_t k = 1; k <= 10; k += 3) {
4852 GemmMicrokernelTester()
4853 .mr(1)
4854 .nr(16)
4855 .kr(1)
4856 .sr(1)
4857 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004858 .n(n)
Frank Barchard36b76b62020-04-10 12:39:17 -07004859 .k(k)
4860 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004861 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004862 }
4863 }
4864 }
4865
4866 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_a) {
4867 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4868 for (uint32_t n = 17; n < 32; n++) {
4869 for (size_t k = 1; k <= 10; k += 3) {
4870 GemmMicrokernelTester()
4871 .mr(1)
4872 .nr(16)
4873 .kr(1)
4874 .sr(1)
4875 .m(1)
4876 .n(n)
4877 .k(k)
4878 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004879 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004880 }
4881 }
4882 }
4883
4884 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_subtile) {
4885 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4886 for (uint32_t n = 17; n < 32; n++) {
4887 for (size_t k = 1; k <= 10; k += 3) {
4888 for (uint32_t m = 1; m <= 1; m++) {
4889 GemmMicrokernelTester()
4890 .mr(1)
4891 .nr(16)
4892 .kr(1)
4893 .sr(1)
4894 .m(m)
4895 .n(n)
4896 .k(k)
4897 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004898 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004899 }
4900 }
4901 }
4902 }
4903
4904 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16) {
4905 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4906 for (uint32_t n = 32; n <= 48; n += 16) {
4907 for (size_t k = 1; k <= 10; k += 3) {
4908 GemmMicrokernelTester()
4909 .mr(1)
4910 .nr(16)
4911 .kr(1)
4912 .sr(1)
4913 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004914 .n(n)
Frank Barchard36b76b62020-04-10 12:39:17 -07004915 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004916 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004917 }
4918 }
4919 }
4920
4921 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_cn) {
4922 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4923 for (uint32_t n = 32; n <= 48; n += 16) {
4924 for (size_t k = 1; k <= 10; k += 3) {
4925 GemmMicrokernelTester()
4926 .mr(1)
4927 .nr(16)
4928 .kr(1)
4929 .sr(1)
4930 .m(1)
4931 .n(n)
4932 .k(k)
4933 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004934 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004935 }
4936 }
4937 }
4938
4939 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_a) {
4940 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4941 for (uint32_t n = 32; n <= 48; n += 16) {
4942 for (size_t k = 1; k <= 10; k += 3) {
4943 GemmMicrokernelTester()
4944 .mr(1)
4945 .nr(16)
4946 .kr(1)
4947 .sr(1)
4948 .m(1)
4949 .n(n)
4950 .k(k)
4951 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004952 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004953 }
4954 }
4955 }
4956
4957 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_subtile) {
4958 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4959 for (uint32_t n = 32; n <= 48; n += 16) {
4960 for (size_t k = 1; k <= 10; k += 3) {
4961 for (uint32_t m = 1; m <= 1; m++) {
4962 GemmMicrokernelTester()
4963 .mr(1)
4964 .nr(16)
4965 .kr(1)
4966 .sr(1)
4967 .m(m)
4968 .n(n)
4969 .k(k)
4970 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004971 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004972 }
4973 }
4974 }
4975 }
4976
4977 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, strided_cm_subtile) {
4978 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
4979 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004980 for (uint32_t n = 1; n <= 16; n++) {
4981 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard36b76b62020-04-10 12:39:17 -07004982 GemmMicrokernelTester()
4983 .mr(1)
4984 .nr(16)
4985 .kr(1)
4986 .sr(1)
4987 .m(m)
4988 .n(n)
4989 .k(k)
4990 .cm_stride(19)
4991 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004992 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07004993 }
4994 }
4995 }
4996 }
4997
4998 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, qmin) {
4999 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5000 GemmMicrokernelTester()
5001 .mr(1)
5002 .nr(16)
5003 .kr(1)
5004 .sr(1)
5005 .m(1)
5006 .n(16)
5007 .k(2)
5008 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005009 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07005010 }
5011
5012 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, qmax) {
5013 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5014 GemmMicrokernelTester()
5015 .mr(1)
5016 .nr(16)
5017 .kr(1)
5018 .sr(1)
5019 .m(1)
5020 .n(16)
5021 .k(2)
5022 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005023 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07005024 }
5025
5026 TEST(F16_GEMM_MINMAX_1X16__AARCH64_NEONFP16ARITH_LD32, strided_cm) {
5027 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5028 GemmMicrokernelTester()
5029 .mr(1)
5030 .nr(16)
5031 .kr(1)
5032 .sr(1)
5033 .m(1)
5034 .n(16)
5035 .k(2)
5036 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005037 .Test(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard36b76b62020-04-10 12:39:17 -07005038 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08005039#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard36b76b62020-04-10 12:39:17 -07005040
5041
Frank Barcharde4d3f762021-12-23 15:31:43 -08005042#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard683f5592020-04-10 00:48:26 -07005043 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2) {
5044 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5045 GemmMicrokernelTester()
5046 .mr(4)
5047 .nr(16)
5048 .kr(1)
5049 .sr(1)
5050 .m(4)
5051 .n(16)
5052 .k(2)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005053 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005054 }
5055
5056 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, strided_cn) {
5057 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5058 GemmMicrokernelTester()
5059 .mr(4)
5060 .nr(16)
5061 .kr(1)
5062 .sr(1)
5063 .m(4)
5064 .n(16)
5065 .k(2)
5066 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005067 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005068 }
5069
5070 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_strided_a) {
5071 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5072 GemmMicrokernelTester()
5073 .mr(4)
5074 .nr(16)
5075 .kr(1)
5076 .sr(1)
5077 .m(4)
5078 .n(16)
5079 .k(2)
5080 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005081 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005082 }
5083
5084 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile) {
5085 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005086 for (uint32_t n = 1; n <= 16; n++) {
5087 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005088 GemmMicrokernelTester()
5089 .mr(4)
5090 .nr(16)
5091 .kr(1)
5092 .sr(1)
5093 .m(m)
5094 .n(n)
5095 .k(2)
5096 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005097 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005098 }
5099 }
5100 }
5101
5102 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_m) {
5103 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5104 for (uint32_t m = 1; m <= 4; m++) {
5105 GemmMicrokernelTester()
5106 .mr(4)
5107 .nr(16)
5108 .kr(1)
5109 .sr(1)
5110 .m(m)
5111 .n(16)
5112 .k(2)
5113 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005114 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005115 }
5116 }
5117
5118 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_n) {
5119 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5120 for (uint32_t n = 1; n <= 16; n++) {
5121 GemmMicrokernelTester()
5122 .mr(4)
5123 .nr(16)
5124 .kr(1)
5125 .sr(1)
5126 .m(4)
5127 .n(n)
5128 .k(2)
5129 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005130 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005131 }
5132 }
5133
5134 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2) {
5135 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5136 for (size_t k = 1; k < 2; k++) {
5137 GemmMicrokernelTester()
5138 .mr(4)
5139 .nr(16)
5140 .kr(1)
5141 .sr(1)
5142 .m(4)
5143 .n(16)
5144 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005145 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005146 }
5147 }
5148
5149 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_strided_a) {
5150 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5151 for (size_t k = 1; k < 2; k++) {
5152 GemmMicrokernelTester()
5153 .mr(4)
5154 .nr(16)
5155 .kr(1)
5156 .sr(1)
5157 .m(4)
5158 .n(16)
5159 .k(k)
5160 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005161 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005162 }
5163 }
5164
5165 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_subtile) {
5166 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5167 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005168 for (uint32_t n = 1; n <= 16; n++) {
5169 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005170 GemmMicrokernelTester()
5171 .mr(4)
5172 .nr(16)
5173 .kr(1)
5174 .sr(1)
5175 .m(m)
5176 .n(n)
5177 .k(k)
5178 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005179 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005180 }
5181 }
5182 }
5183 }
5184
5185 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2) {
5186 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5187 for (size_t k = 3; k < 4; k++) {
5188 GemmMicrokernelTester()
5189 .mr(4)
5190 .nr(16)
5191 .kr(1)
5192 .sr(1)
5193 .m(4)
5194 .n(16)
5195 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005196 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005197 }
5198 }
5199
5200 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_strided_a) {
5201 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5202 for (size_t k = 3; k < 4; k++) {
5203 GemmMicrokernelTester()
5204 .mr(4)
5205 .nr(16)
5206 .kr(1)
5207 .sr(1)
5208 .m(4)
5209 .n(16)
5210 .k(k)
5211 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005212 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005213 }
5214 }
5215
5216 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_subtile) {
5217 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5218 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005219 for (uint32_t n = 1; n <= 16; n++) {
5220 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005221 GemmMicrokernelTester()
5222 .mr(4)
5223 .nr(16)
5224 .kr(1)
5225 .sr(1)
5226 .m(m)
5227 .n(n)
5228 .k(k)
5229 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005230 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005231 }
5232 }
5233 }
5234 }
5235
5236 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_div_2) {
5237 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5238 for (size_t k = 4; k <= 20; k += 2) {
5239 GemmMicrokernelTester()
5240 .mr(4)
5241 .nr(16)
5242 .kr(1)
5243 .sr(1)
5244 .m(4)
5245 .n(16)
5246 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005247 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005248 }
5249 }
5250
5251 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_strided_a) {
5252 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5253 for (size_t k = 4; k <= 20; k += 2) {
5254 GemmMicrokernelTester()
5255 .mr(4)
5256 .nr(16)
5257 .kr(1)
5258 .sr(1)
5259 .m(4)
5260 .n(16)
5261 .k(k)
5262 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005263 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005264 }
5265 }
5266
5267 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_subtile) {
5268 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5269 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005270 for (uint32_t n = 1; n <= 16; n++) {
5271 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005272 GemmMicrokernelTester()
5273 .mr(4)
5274 .nr(16)
5275 .kr(1)
5276 .sr(1)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005281 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005282 }
5283 }
5284 }
5285 }
5286
5287 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16) {
5288 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5289 for (uint32_t n = 17; n < 32; n++) {
5290 for (size_t k = 1; k <= 10; k += 3) {
5291 GemmMicrokernelTester()
5292 .mr(4)
5293 .nr(16)
5294 .kr(1)
5295 .sr(1)
5296 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005297 .n(n)
Frank Barchard683f5592020-04-10 00:48:26 -07005298 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005299 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005300 }
5301 }
5302 }
5303
5304 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_cn) {
5305 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5306 for (uint32_t n = 17; n < 32; n++) {
5307 for (size_t k = 1; k <= 10; k += 3) {
5308 GemmMicrokernelTester()
5309 .mr(4)
5310 .nr(16)
5311 .kr(1)
5312 .sr(1)
5313 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005314 .n(n)
Frank Barchard683f5592020-04-10 00:48:26 -07005315 .k(k)
5316 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005317 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005318 }
5319 }
5320 }
5321
5322 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_a) {
5323 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5324 for (uint32_t n = 17; n < 32; n++) {
5325 for (size_t k = 1; k <= 10; k += 3) {
5326 GemmMicrokernelTester()
5327 .mr(4)
5328 .nr(16)
5329 .kr(1)
5330 .sr(1)
5331 .m(4)
5332 .n(n)
5333 .k(k)
5334 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005335 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005336 }
5337 }
5338 }
5339
5340 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_subtile) {
5341 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5342 for (uint32_t n = 17; n < 32; n++) {
5343 for (size_t k = 1; k <= 10; k += 3) {
5344 for (uint32_t m = 1; m <= 4; m++) {
5345 GemmMicrokernelTester()
5346 .mr(4)
5347 .nr(16)
5348 .kr(1)
5349 .sr(1)
5350 .m(m)
5351 .n(n)
5352 .k(k)
5353 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005354 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005355 }
5356 }
5357 }
5358 }
5359
5360 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16) {
5361 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5362 for (uint32_t n = 32; n <= 48; n += 16) {
5363 for (size_t k = 1; k <= 10; k += 3) {
5364 GemmMicrokernelTester()
5365 .mr(4)
5366 .nr(16)
5367 .kr(1)
5368 .sr(1)
5369 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005370 .n(n)
Frank Barchard683f5592020-04-10 00:48:26 -07005371 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005372 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005373 }
5374 }
5375 }
5376
5377 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_cn) {
5378 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5379 for (uint32_t n = 32; n <= 48; n += 16) {
5380 for (size_t k = 1; k <= 10; k += 3) {
5381 GemmMicrokernelTester()
5382 .mr(4)
5383 .nr(16)
5384 .kr(1)
5385 .sr(1)
5386 .m(4)
5387 .n(n)
5388 .k(k)
5389 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005390 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005391 }
5392 }
5393 }
5394
5395 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_a) {
5396 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5397 for (uint32_t n = 32; n <= 48; n += 16) {
5398 for (size_t k = 1; k <= 10; k += 3) {
5399 GemmMicrokernelTester()
5400 .mr(4)
5401 .nr(16)
5402 .kr(1)
5403 .sr(1)
5404 .m(4)
5405 .n(n)
5406 .k(k)
5407 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005408 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005409 }
5410 }
5411 }
5412
5413 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_subtile) {
5414 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5415 for (uint32_t n = 32; n <= 48; n += 16) {
5416 for (size_t k = 1; k <= 10; k += 3) {
5417 for (uint32_t m = 1; m <= 4; m++) {
5418 GemmMicrokernelTester()
5419 .mr(4)
5420 .nr(16)
5421 .kr(1)
5422 .sr(1)
5423 .m(m)
5424 .n(n)
5425 .k(k)
5426 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005427 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005428 }
5429 }
5430 }
5431 }
5432
5433 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, strided_cm_subtile) {
5434 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5435 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005436 for (uint32_t n = 1; n <= 16; n++) {
5437 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005438 GemmMicrokernelTester()
5439 .mr(4)
5440 .nr(16)
5441 .kr(1)
5442 .sr(1)
5443 .m(m)
5444 .n(n)
5445 .k(k)
5446 .cm_stride(19)
5447 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005448 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005449 }
5450 }
5451 }
5452 }
5453
5454 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, qmin) {
5455 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5456 GemmMicrokernelTester()
5457 .mr(4)
5458 .nr(16)
5459 .kr(1)
5460 .sr(1)
5461 .m(4)
5462 .n(16)
5463 .k(2)
5464 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005465 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005466 }
5467
5468 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, qmax) {
5469 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5470 GemmMicrokernelTester()
5471 .mr(4)
5472 .nr(16)
5473 .kr(1)
5474 .sr(1)
5475 .m(4)
5476 .n(16)
5477 .k(2)
5478 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005479 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005480 }
5481
5482 TEST(F16_GEMM_MINMAX_4X16__AARCH64_NEONFP16ARITH_LD32, strided_cm) {
5483 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5484 GemmMicrokernelTester()
5485 .mr(4)
5486 .nr(16)
5487 .kr(1)
5488 .sr(1)
5489 .m(4)
5490 .n(16)
5491 .k(2)
5492 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005493 .Test(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005494 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08005495#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard683f5592020-04-10 00:48:26 -07005496
5497
Frank Barcharde4d3f762021-12-23 15:31:43 -08005498#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard683f5592020-04-10 00:48:26 -07005499 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2) {
5500 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5501 GemmMicrokernelTester()
5502 .mr(6)
5503 .nr(16)
5504 .kr(1)
5505 .sr(1)
5506 .m(6)
5507 .n(16)
5508 .k(2)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005509 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005510 }
5511
5512 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, strided_cn) {
5513 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5514 GemmMicrokernelTester()
5515 .mr(6)
5516 .nr(16)
5517 .kr(1)
5518 .sr(1)
5519 .m(6)
5520 .n(16)
5521 .k(2)
5522 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005523 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005524 }
5525
5526 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_strided_a) {
5527 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5528 GemmMicrokernelTester()
5529 .mr(6)
5530 .nr(16)
5531 .kr(1)
5532 .sr(1)
5533 .m(6)
5534 .n(16)
5535 .k(2)
5536 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005537 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005538 }
5539
5540 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile) {
5541 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005542 for (uint32_t n = 1; n <= 16; n++) {
5543 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005544 GemmMicrokernelTester()
5545 .mr(6)
5546 .nr(16)
5547 .kr(1)
5548 .sr(1)
5549 .m(m)
5550 .n(n)
5551 .k(2)
5552 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005553 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005554 }
5555 }
5556 }
5557
5558 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_m) {
5559 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5560 for (uint32_t m = 1; m <= 6; m++) {
5561 GemmMicrokernelTester()
5562 .mr(6)
5563 .nr(16)
5564 .kr(1)
5565 .sr(1)
5566 .m(m)
5567 .n(16)
5568 .k(2)
5569 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005570 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005571 }
5572 }
5573
5574 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_eq_2_subtile_n) {
5575 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5576 for (uint32_t n = 1; n <= 16; n++) {
5577 GemmMicrokernelTester()
5578 .mr(6)
5579 .nr(16)
5580 .kr(1)
5581 .sr(1)
5582 .m(6)
5583 .n(n)
5584 .k(2)
5585 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005586 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005587 }
5588 }
5589
5590 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2) {
5591 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5592 for (size_t k = 1; k < 2; k++) {
5593 GemmMicrokernelTester()
5594 .mr(6)
5595 .nr(16)
5596 .kr(1)
5597 .sr(1)
5598 .m(6)
5599 .n(16)
5600 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005601 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005602 }
5603 }
5604
5605 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_strided_a) {
5606 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5607 for (size_t k = 1; k < 2; k++) {
5608 GemmMicrokernelTester()
5609 .mr(6)
5610 .nr(16)
5611 .kr(1)
5612 .sr(1)
5613 .m(6)
5614 .n(16)
5615 .k(k)
5616 .a_stride(5)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005617 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005618 }
5619 }
5620
5621 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_lt_2_subtile) {
5622 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5623 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005624 for (uint32_t n = 1; n <= 16; n++) {
5625 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005626 GemmMicrokernelTester()
5627 .mr(6)
5628 .nr(16)
5629 .kr(1)
5630 .sr(1)
5631 .m(m)
5632 .n(n)
5633 .k(k)
5634 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005635 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005636 }
5637 }
5638 }
5639 }
5640
5641 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2) {
5642 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5643 for (size_t k = 3; k < 4; k++) {
5644 GemmMicrokernelTester()
5645 .mr(6)
5646 .nr(16)
5647 .kr(1)
5648 .sr(1)
5649 .m(6)
5650 .n(16)
5651 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005652 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005653 }
5654 }
5655
5656 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_strided_a) {
5657 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5658 for (size_t k = 3; k < 4; k++) {
5659 GemmMicrokernelTester()
5660 .mr(6)
5661 .nr(16)
5662 .kr(1)
5663 .sr(1)
5664 .m(6)
5665 .n(16)
5666 .k(k)
5667 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005668 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005669 }
5670 }
5671
5672 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_gt_2_subtile) {
5673 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5674 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005675 for (uint32_t n = 1; n <= 16; n++) {
5676 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005677 GemmMicrokernelTester()
5678 .mr(6)
5679 .nr(16)
5680 .kr(1)
5681 .sr(1)
5682 .m(m)
5683 .n(n)
5684 .k(k)
5685 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005686 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005687 }
5688 }
5689 }
5690 }
5691
5692 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_div_2) {
5693 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5694 for (size_t k = 4; k <= 20; k += 2) {
5695 GemmMicrokernelTester()
5696 .mr(6)
5697 .nr(16)
5698 .kr(1)
5699 .sr(1)
5700 .m(6)
5701 .n(16)
5702 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005703 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005704 }
5705 }
5706
5707 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_strided_a) {
5708 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5709 for (size_t k = 4; k <= 20; k += 2) {
5710 GemmMicrokernelTester()
5711 .mr(6)
5712 .nr(16)
5713 .kr(1)
5714 .sr(1)
5715 .m(6)
5716 .n(16)
5717 .k(k)
5718 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005719 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005720 }
5721 }
5722
5723 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, k_div_2_subtile) {
5724 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5725 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005726 for (uint32_t n = 1; n <= 16; n++) {
5727 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005728 GemmMicrokernelTester()
5729 .mr(6)
5730 .nr(16)
5731 .kr(1)
5732 .sr(1)
5733 .m(m)
5734 .n(n)
5735 .k(k)
5736 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005737 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005738 }
5739 }
5740 }
5741 }
5742
5743 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16) {
5744 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5745 for (uint32_t n = 17; n < 32; n++) {
5746 for (size_t k = 1; k <= 10; k += 3) {
5747 GemmMicrokernelTester()
5748 .mr(6)
5749 .nr(16)
5750 .kr(1)
5751 .sr(1)
5752 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005753 .n(n)
Frank Barchard683f5592020-04-10 00:48:26 -07005754 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005755 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005756 }
5757 }
5758 }
5759
5760 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_cn) {
5761 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5762 for (uint32_t n = 17; n < 32; n++) {
5763 for (size_t k = 1; k <= 10; k += 3) {
5764 GemmMicrokernelTester()
5765 .mr(6)
5766 .nr(16)
5767 .kr(1)
5768 .sr(1)
5769 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005770 .n(n)
Frank Barchard683f5592020-04-10 00:48:26 -07005771 .k(k)
5772 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005773 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005774 }
5775 }
5776 }
5777
5778 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_strided_a) {
5779 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5780 for (uint32_t n = 17; n < 32; n++) {
5781 for (size_t k = 1; k <= 10; k += 3) {
5782 GemmMicrokernelTester()
5783 .mr(6)
5784 .nr(16)
5785 .kr(1)
5786 .sr(1)
5787 .m(6)
5788 .n(n)
5789 .k(k)
5790 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005791 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005792 }
5793 }
5794 }
5795
5796 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_gt_16_subtile) {
5797 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5798 for (uint32_t n = 17; n < 32; n++) {
5799 for (size_t k = 1; k <= 10; k += 3) {
5800 for (uint32_t m = 1; m <= 6; m++) {
5801 GemmMicrokernelTester()
5802 .mr(6)
5803 .nr(16)
5804 .kr(1)
5805 .sr(1)
5806 .m(m)
5807 .n(n)
5808 .k(k)
5809 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005810 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005811 }
5812 }
5813 }
5814 }
5815
5816 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16) {
5817 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5818 for (uint32_t n = 32; n <= 48; n += 16) {
5819 for (size_t k = 1; k <= 10; k += 3) {
5820 GemmMicrokernelTester()
5821 .mr(6)
5822 .nr(16)
5823 .kr(1)
5824 .sr(1)
5825 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005826 .n(n)
Frank Barchard683f5592020-04-10 00:48:26 -07005827 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005828 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005829 }
5830 }
5831 }
5832
5833 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_cn) {
5834 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5835 for (uint32_t n = 32; n <= 48; n += 16) {
5836 for (size_t k = 1; k <= 10; k += 3) {
5837 GemmMicrokernelTester()
5838 .mr(6)
5839 .nr(16)
5840 .kr(1)
5841 .sr(1)
5842 .m(6)
5843 .n(n)
5844 .k(k)
5845 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005846 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005847 }
5848 }
5849 }
5850
5851 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_strided_a) {
5852 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5853 for (uint32_t n = 32; n <= 48; n += 16) {
5854 for (size_t k = 1; k <= 10; k += 3) {
5855 GemmMicrokernelTester()
5856 .mr(6)
5857 .nr(16)
5858 .kr(1)
5859 .sr(1)
5860 .m(6)
5861 .n(n)
5862 .k(k)
5863 .a_stride(13)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005864 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005865 }
5866 }
5867 }
5868
5869 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, n_div_16_subtile) {
5870 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5871 for (uint32_t n = 32; n <= 48; n += 16) {
5872 for (size_t k = 1; k <= 10; k += 3) {
5873 for (uint32_t m = 1; m <= 6; m++) {
5874 GemmMicrokernelTester()
5875 .mr(6)
5876 .nr(16)
5877 .kr(1)
5878 .sr(1)
5879 .m(m)
5880 .n(n)
5881 .k(k)
5882 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005883 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005884 }
5885 }
5886 }
5887 }
5888
5889 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, strided_cm_subtile) {
5890 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5891 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005892 for (uint32_t n = 1; n <= 16; n++) {
5893 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard683f5592020-04-10 00:48:26 -07005894 GemmMicrokernelTester()
5895 .mr(6)
5896 .nr(16)
5897 .kr(1)
5898 .sr(1)
5899 .m(m)
5900 .n(n)
5901 .k(k)
5902 .cm_stride(19)
5903 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005904 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005905 }
5906 }
5907 }
5908 }
5909
5910 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, qmin) {
5911 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5912 GemmMicrokernelTester()
5913 .mr(6)
5914 .nr(16)
5915 .kr(1)
5916 .sr(1)
5917 .m(6)
5918 .n(16)
5919 .k(2)
5920 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005921 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005922 }
5923
5924 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, qmax) {
5925 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5926 GemmMicrokernelTester()
5927 .mr(6)
5928 .nr(16)
5929 .kr(1)
5930 .sr(1)
5931 .m(6)
5932 .n(16)
5933 .k(2)
5934 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005935 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005936 }
5937
5938 TEST(F16_GEMM_MINMAX_6X16__AARCH64_NEONFP16ARITH_LD32, strided_cm) {
5939 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5940 GemmMicrokernelTester()
5941 .mr(6)
5942 .nr(16)
5943 .kr(1)
5944 .sr(1)
5945 .m(6)
5946 .n(16)
5947 .k(2)
5948 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005949 .Test(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard683f5592020-04-10 00:48:26 -07005950 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08005951#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardbddfbcd2020-04-15 12:32:41 -07005952
5953
Frank Barcharde4d3f762021-12-23 15:31:43 -08005954#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardbddfbcd2020-04-15 12:32:41 -07005955 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4) {
5956 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5957 GemmMicrokernelTester()
5958 .mr(1)
5959 .nr(8)
5960 .kr(1)
5961 .sr(1)
5962 .m(1)
5963 .n(8)
5964 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005965 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07005966 }
5967
5968 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, strided_cn) {
5969 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5970 GemmMicrokernelTester()
5971 .mr(1)
5972 .nr(8)
5973 .kr(1)
5974 .sr(1)
5975 .m(1)
5976 .n(8)
5977 .k(4)
5978 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005979 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07005980 }
5981
5982 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_strided_a) {
5983 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
5984 GemmMicrokernelTester()
5985 .mr(1)
5986 .nr(8)
5987 .kr(1)
5988 .sr(1)
5989 .m(1)
5990 .n(8)
5991 .k(4)
5992 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005993 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07005994 }
5995
5996 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile) {
5997 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005998 for (uint32_t n = 1; n <= 8; n++) {
5999 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006000 GemmMicrokernelTester()
6001 .mr(1)
6002 .nr(8)
6003 .kr(1)
6004 .sr(1)
6005 .m(m)
6006 .n(n)
6007 .k(4)
6008 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006009 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006010 }
6011 }
6012 }
6013
6014 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
6015 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6016 for (uint32_t m = 1; m <= 1; m++) {
6017 GemmMicrokernelTester()
6018 .mr(1)
6019 .nr(8)
6020 .kr(1)
6021 .sr(1)
6022 .m(m)
6023 .n(8)
6024 .k(4)
6025 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006026 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006027 }
6028 }
6029
6030 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
6031 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6032 for (uint32_t n = 1; n <= 8; n++) {
6033 GemmMicrokernelTester()
6034 .mr(1)
6035 .nr(8)
6036 .kr(1)
6037 .sr(1)
6038 .m(1)
6039 .n(n)
6040 .k(4)
6041 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006042 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006043 }
6044 }
6045
6046 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4) {
6047 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6048 for (size_t k = 1; k < 4; k++) {
6049 GemmMicrokernelTester()
6050 .mr(1)
6051 .nr(8)
6052 .kr(1)
6053 .sr(1)
6054 .m(1)
6055 .n(8)
6056 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006057 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006058 }
6059 }
6060
6061 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_strided_a) {
6062 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6063 for (size_t k = 1; k < 4; k++) {
6064 GemmMicrokernelTester()
6065 .mr(1)
6066 .nr(8)
6067 .kr(1)
6068 .sr(1)
6069 .m(1)
6070 .n(8)
6071 .k(k)
6072 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006073 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006074 }
6075 }
6076
6077 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_subtile) {
6078 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6079 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006080 for (uint32_t n = 1; n <= 8; n++) {
6081 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006082 GemmMicrokernelTester()
6083 .mr(1)
6084 .nr(8)
6085 .kr(1)
6086 .sr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006091 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006092 }
6093 }
6094 }
6095 }
6096
6097 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4) {
6098 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6099 for (size_t k = 5; k < 8; k++) {
6100 GemmMicrokernelTester()
6101 .mr(1)
6102 .nr(8)
6103 .kr(1)
6104 .sr(1)
6105 .m(1)
6106 .n(8)
6107 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006108 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006109 }
6110 }
6111
6112 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_strided_a) {
6113 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6114 for (size_t k = 5; k < 8; k++) {
6115 GemmMicrokernelTester()
6116 .mr(1)
6117 .nr(8)
6118 .kr(1)
6119 .sr(1)
6120 .m(1)
6121 .n(8)
6122 .k(k)
6123 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006124 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006125 }
6126 }
6127
6128 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_subtile) {
6129 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6130 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006131 for (uint32_t n = 1; n <= 8; n++) {
6132 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006133 GemmMicrokernelTester()
6134 .mr(1)
6135 .nr(8)
6136 .kr(1)
6137 .sr(1)
6138 .m(m)
6139 .n(n)
6140 .k(k)
6141 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006142 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006143 }
6144 }
6145 }
6146 }
6147
6148 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_div_4) {
6149 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6150 for (size_t k = 8; k <= 40; k += 4) {
6151 GemmMicrokernelTester()
6152 .mr(1)
6153 .nr(8)
6154 .kr(1)
6155 .sr(1)
6156 .m(1)
6157 .n(8)
6158 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006159 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006160 }
6161 }
6162
6163 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_strided_a) {
6164 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6165 for (size_t k = 8; k <= 40; k += 4) {
6166 GemmMicrokernelTester()
6167 .mr(1)
6168 .nr(8)
6169 .kr(1)
6170 .sr(1)
6171 .m(1)
6172 .n(8)
6173 .k(k)
6174 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006175 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006176 }
6177 }
6178
6179 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_subtile) {
6180 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6181 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006182 for (uint32_t n = 1; n <= 8; n++) {
6183 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006184 GemmMicrokernelTester()
6185 .mr(1)
6186 .nr(8)
6187 .kr(1)
6188 .sr(1)
6189 .m(m)
6190 .n(n)
6191 .k(k)
6192 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006193 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006194 }
6195 }
6196 }
6197 }
6198
6199 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8) {
6200 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6201 for (uint32_t n = 9; n < 16; n++) {
6202 for (size_t k = 1; k <= 20; k += 5) {
6203 GemmMicrokernelTester()
6204 .mr(1)
6205 .nr(8)
6206 .kr(1)
6207 .sr(1)
6208 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006209 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006210 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006211 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006212 }
6213 }
6214 }
6215
6216 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
6217 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6218 for (uint32_t n = 9; n < 16; n++) {
6219 for (size_t k = 1; k <= 20; k += 5) {
6220 GemmMicrokernelTester()
6221 .mr(1)
6222 .nr(8)
6223 .kr(1)
6224 .sr(1)
6225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006226 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006227 .k(k)
6228 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006229 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006230 }
6231 }
6232 }
6233
6234 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_a) {
6235 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6236 for (uint32_t n = 9; n < 16; n++) {
6237 for (size_t k = 1; k <= 20; k += 5) {
6238 GemmMicrokernelTester()
6239 .mr(1)
6240 .nr(8)
6241 .kr(1)
6242 .sr(1)
6243 .m(1)
6244 .n(n)
6245 .k(k)
6246 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006247 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006248 }
6249 }
6250 }
6251
6252 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_subtile) {
6253 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6254 for (uint32_t n = 9; n < 16; n++) {
6255 for (size_t k = 1; k <= 20; k += 5) {
6256 for (uint32_t m = 1; m <= 1; m++) {
6257 GemmMicrokernelTester()
6258 .mr(1)
6259 .nr(8)
6260 .kr(1)
6261 .sr(1)
6262 .m(m)
6263 .n(n)
6264 .k(k)
6265 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006266 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006267 }
6268 }
6269 }
6270 }
6271
6272 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_div_8) {
6273 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6274 for (uint32_t n = 16; n <= 24; n += 8) {
6275 for (size_t k = 1; k <= 20; k += 5) {
6276 GemmMicrokernelTester()
6277 .mr(1)
6278 .nr(8)
6279 .kr(1)
6280 .sr(1)
6281 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006282 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006283 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006284 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006285 }
6286 }
6287 }
6288
6289 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_cn) {
6290 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6291 for (uint32_t n = 16; n <= 24; n += 8) {
6292 for (size_t k = 1; k <= 20; k += 5) {
6293 GemmMicrokernelTester()
6294 .mr(1)
6295 .nr(8)
6296 .kr(1)
6297 .sr(1)
6298 .m(1)
6299 .n(n)
6300 .k(k)
6301 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006302 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006303 }
6304 }
6305 }
6306
6307 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_a) {
6308 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6309 for (uint32_t n = 16; n <= 24; n += 8) {
6310 for (size_t k = 1; k <= 20; k += 5) {
6311 GemmMicrokernelTester()
6312 .mr(1)
6313 .nr(8)
6314 .kr(1)
6315 .sr(1)
6316 .m(1)
6317 .n(n)
6318 .k(k)
6319 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006320 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006321 }
6322 }
6323 }
6324
6325 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_subtile) {
6326 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6327 for (uint32_t n = 16; n <= 24; n += 8) {
6328 for (size_t k = 1; k <= 20; k += 5) {
6329 for (uint32_t m = 1; m <= 1; m++) {
6330 GemmMicrokernelTester()
6331 .mr(1)
6332 .nr(8)
6333 .kr(1)
6334 .sr(1)
6335 .m(m)
6336 .n(n)
6337 .k(k)
6338 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006339 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006340 }
6341 }
6342 }
6343 }
6344
6345 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, strided_cm_subtile) {
6346 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6347 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006348 for (uint32_t n = 1; n <= 8; n++) {
6349 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006350 GemmMicrokernelTester()
6351 .mr(1)
6352 .nr(8)
6353 .kr(1)
6354 .sr(1)
6355 .m(m)
6356 .n(n)
6357 .k(k)
6358 .cm_stride(11)
6359 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006360 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006361 }
6362 }
6363 }
6364 }
6365
6366 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, qmin) {
6367 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6368 GemmMicrokernelTester()
6369 .mr(1)
6370 .nr(8)
6371 .kr(1)
6372 .sr(1)
6373 .m(1)
6374 .n(8)
6375 .k(4)
6376 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006377 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006378 }
6379
6380 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, qmax) {
6381 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6382 GemmMicrokernelTester()
6383 .mr(1)
6384 .nr(8)
6385 .kr(1)
6386 .sr(1)
6387 .m(1)
6388 .n(8)
6389 .k(4)
6390 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006391 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006392 }
6393
6394 TEST(F16_GEMM_MINMAX_1X8__AARCH64_NEONFP16ARITH_LD64, strided_cm) {
6395 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6396 GemmMicrokernelTester()
6397 .mr(1)
6398 .nr(8)
6399 .kr(1)
6400 .sr(1)
6401 .m(1)
6402 .n(8)
6403 .k(4)
6404 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006405 .Test(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006406 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08006407#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006408
6409
Frank Barcharde4d3f762021-12-23 15:31:43 -08006410#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006411 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4) {
6412 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6413 GemmMicrokernelTester()
6414 .mr(4)
6415 .nr(8)
6416 .kr(1)
6417 .sr(1)
6418 .m(4)
6419 .n(8)
6420 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006421 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006422 }
6423
6424 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, strided_cn) {
6425 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6426 GemmMicrokernelTester()
6427 .mr(4)
6428 .nr(8)
6429 .kr(1)
6430 .sr(1)
6431 .m(4)
6432 .n(8)
6433 .k(4)
6434 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006435 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006436 }
6437
6438 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_strided_a) {
6439 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6440 GemmMicrokernelTester()
6441 .mr(4)
6442 .nr(8)
6443 .kr(1)
6444 .sr(1)
6445 .m(4)
6446 .n(8)
6447 .k(4)
6448 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006449 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006450 }
6451
6452 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile) {
6453 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006454 for (uint32_t n = 1; n <= 8; n++) {
6455 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006456 GemmMicrokernelTester()
6457 .mr(4)
6458 .nr(8)
6459 .kr(1)
6460 .sr(1)
6461 .m(m)
6462 .n(n)
6463 .k(4)
6464 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006465 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006466 }
6467 }
6468 }
6469
6470 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
6471 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6472 for (uint32_t m = 1; m <= 4; m++) {
6473 GemmMicrokernelTester()
6474 .mr(4)
6475 .nr(8)
6476 .kr(1)
6477 .sr(1)
6478 .m(m)
6479 .n(8)
6480 .k(4)
6481 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006482 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006483 }
6484 }
6485
6486 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
6487 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6488 for (uint32_t n = 1; n <= 8; n++) {
6489 GemmMicrokernelTester()
6490 .mr(4)
6491 .nr(8)
6492 .kr(1)
6493 .sr(1)
6494 .m(4)
6495 .n(n)
6496 .k(4)
6497 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006498 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006499 }
6500 }
6501
6502 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4) {
6503 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6504 for (size_t k = 1; k < 4; k++) {
6505 GemmMicrokernelTester()
6506 .mr(4)
6507 .nr(8)
6508 .kr(1)
6509 .sr(1)
6510 .m(4)
6511 .n(8)
6512 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006513 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006514 }
6515 }
6516
6517 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_strided_a) {
6518 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6519 for (size_t k = 1; k < 4; k++) {
6520 GemmMicrokernelTester()
6521 .mr(4)
6522 .nr(8)
6523 .kr(1)
6524 .sr(1)
6525 .m(4)
6526 .n(8)
6527 .k(k)
6528 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006529 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006530 }
6531 }
6532
6533 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_subtile) {
6534 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6535 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006536 for (uint32_t n = 1; n <= 8; n++) {
6537 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006538 GemmMicrokernelTester()
6539 .mr(4)
6540 .nr(8)
6541 .kr(1)
6542 .sr(1)
6543 .m(m)
6544 .n(n)
6545 .k(k)
6546 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006547 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006548 }
6549 }
6550 }
6551 }
6552
6553 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4) {
6554 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6555 for (size_t k = 5; k < 8; k++) {
6556 GemmMicrokernelTester()
6557 .mr(4)
6558 .nr(8)
6559 .kr(1)
6560 .sr(1)
6561 .m(4)
6562 .n(8)
6563 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006564 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006565 }
6566 }
6567
6568 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_strided_a) {
6569 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6570 for (size_t k = 5; k < 8; k++) {
6571 GemmMicrokernelTester()
6572 .mr(4)
6573 .nr(8)
6574 .kr(1)
6575 .sr(1)
6576 .m(4)
6577 .n(8)
6578 .k(k)
6579 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006580 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006581 }
6582 }
6583
6584 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_subtile) {
6585 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6586 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006587 for (uint32_t n = 1; n <= 8; n++) {
6588 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006589 GemmMicrokernelTester()
6590 .mr(4)
6591 .nr(8)
6592 .kr(1)
6593 .sr(1)
6594 .m(m)
6595 .n(n)
6596 .k(k)
6597 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006598 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006599 }
6600 }
6601 }
6602 }
6603
6604 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_div_4) {
6605 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6606 for (size_t k = 8; k <= 40; k += 4) {
6607 GemmMicrokernelTester()
6608 .mr(4)
6609 .nr(8)
6610 .kr(1)
6611 .sr(1)
6612 .m(4)
6613 .n(8)
6614 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006615 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006616 }
6617 }
6618
6619 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_strided_a) {
6620 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6621 for (size_t k = 8; k <= 40; k += 4) {
6622 GemmMicrokernelTester()
6623 .mr(4)
6624 .nr(8)
6625 .kr(1)
6626 .sr(1)
6627 .m(4)
6628 .n(8)
6629 .k(k)
6630 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006631 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006632 }
6633 }
6634
6635 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_subtile) {
6636 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6637 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006638 for (uint32_t n = 1; n <= 8; n++) {
6639 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006640 GemmMicrokernelTester()
6641 .mr(4)
6642 .nr(8)
6643 .kr(1)
6644 .sr(1)
6645 .m(m)
6646 .n(n)
6647 .k(k)
6648 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006649 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006650 }
6651 }
6652 }
6653 }
6654
6655 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8) {
6656 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6657 for (uint32_t n = 9; n < 16; n++) {
6658 for (size_t k = 1; k <= 20; k += 5) {
6659 GemmMicrokernelTester()
6660 .mr(4)
6661 .nr(8)
6662 .kr(1)
6663 .sr(1)
6664 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006665 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006666 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006667 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006668 }
6669 }
6670 }
6671
6672 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
6673 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6674 for (uint32_t n = 9; n < 16; n++) {
6675 for (size_t k = 1; k <= 20; k += 5) {
6676 GemmMicrokernelTester()
6677 .mr(4)
6678 .nr(8)
6679 .kr(1)
6680 .sr(1)
6681 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006682 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006683 .k(k)
6684 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006685 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006686 }
6687 }
6688 }
6689
6690 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_a) {
6691 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6692 for (uint32_t n = 9; n < 16; n++) {
6693 for (size_t k = 1; k <= 20; k += 5) {
6694 GemmMicrokernelTester()
6695 .mr(4)
6696 .nr(8)
6697 .kr(1)
6698 .sr(1)
6699 .m(4)
6700 .n(n)
6701 .k(k)
6702 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006703 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006704 }
6705 }
6706 }
6707
6708 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_subtile) {
6709 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6710 for (uint32_t n = 9; n < 16; n++) {
6711 for (size_t k = 1; k <= 20; k += 5) {
6712 for (uint32_t m = 1; m <= 4; m++) {
6713 GemmMicrokernelTester()
6714 .mr(4)
6715 .nr(8)
6716 .kr(1)
6717 .sr(1)
6718 .m(m)
6719 .n(n)
6720 .k(k)
6721 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006722 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006723 }
6724 }
6725 }
6726 }
6727
6728 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_div_8) {
6729 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6730 for (uint32_t n = 16; n <= 24; n += 8) {
6731 for (size_t k = 1; k <= 20; k += 5) {
6732 GemmMicrokernelTester()
6733 .mr(4)
6734 .nr(8)
6735 .kr(1)
6736 .sr(1)
6737 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006738 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006739 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006740 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006741 }
6742 }
6743 }
6744
6745 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_cn) {
6746 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6747 for (uint32_t n = 16; n <= 24; n += 8) {
6748 for (size_t k = 1; k <= 20; k += 5) {
6749 GemmMicrokernelTester()
6750 .mr(4)
6751 .nr(8)
6752 .kr(1)
6753 .sr(1)
6754 .m(4)
6755 .n(n)
6756 .k(k)
6757 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006758 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006759 }
6760 }
6761 }
6762
6763 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_a) {
6764 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6765 for (uint32_t n = 16; n <= 24; n += 8) {
6766 for (size_t k = 1; k <= 20; k += 5) {
6767 GemmMicrokernelTester()
6768 .mr(4)
6769 .nr(8)
6770 .kr(1)
6771 .sr(1)
6772 .m(4)
6773 .n(n)
6774 .k(k)
6775 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006776 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006777 }
6778 }
6779 }
6780
6781 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_subtile) {
6782 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6783 for (uint32_t n = 16; n <= 24; n += 8) {
6784 for (size_t k = 1; k <= 20; k += 5) {
6785 for (uint32_t m = 1; m <= 4; m++) {
6786 GemmMicrokernelTester()
6787 .mr(4)
6788 .nr(8)
6789 .kr(1)
6790 .sr(1)
6791 .m(m)
6792 .n(n)
6793 .k(k)
6794 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006795 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006796 }
6797 }
6798 }
6799 }
6800
6801 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, strided_cm_subtile) {
6802 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6803 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006804 for (uint32_t n = 1; n <= 8; n++) {
6805 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006806 GemmMicrokernelTester()
6807 .mr(4)
6808 .nr(8)
6809 .kr(1)
6810 .sr(1)
6811 .m(m)
6812 .n(n)
6813 .k(k)
6814 .cm_stride(11)
6815 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006816 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006817 }
6818 }
6819 }
6820 }
6821
6822 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, qmin) {
6823 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6824 GemmMicrokernelTester()
6825 .mr(4)
6826 .nr(8)
6827 .kr(1)
6828 .sr(1)
6829 .m(4)
6830 .n(8)
6831 .k(4)
6832 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006833 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006834 }
6835
6836 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, qmax) {
6837 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6838 GemmMicrokernelTester()
6839 .mr(4)
6840 .nr(8)
6841 .kr(1)
6842 .sr(1)
6843 .m(4)
6844 .n(8)
6845 .k(4)
6846 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006847 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006848 }
6849
6850 TEST(F16_GEMM_MINMAX_4X8__AARCH64_NEONFP16ARITH_LD64, strided_cm) {
6851 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6852 GemmMicrokernelTester()
6853 .mr(4)
6854 .nr(8)
6855 .kr(1)
6856 .sr(1)
6857 .m(4)
6858 .n(8)
6859 .k(4)
6860 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006861 .Test(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006862 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08006863#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006864
6865
Frank Barcharde4d3f762021-12-23 15:31:43 -08006866#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006867 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4) {
6868 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6869 GemmMicrokernelTester()
6870 .mr(6)
6871 .nr(8)
6872 .kr(1)
6873 .sr(1)
6874 .m(6)
6875 .n(8)
6876 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006877 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006878 }
6879
6880 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, strided_cn) {
6881 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6882 GemmMicrokernelTester()
6883 .mr(6)
6884 .nr(8)
6885 .kr(1)
6886 .sr(1)
6887 .m(6)
6888 .n(8)
6889 .k(4)
6890 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006891 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006892 }
6893
6894 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_strided_a) {
6895 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6896 GemmMicrokernelTester()
6897 .mr(6)
6898 .nr(8)
6899 .kr(1)
6900 .sr(1)
6901 .m(6)
6902 .n(8)
6903 .k(4)
6904 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006905 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006906 }
6907
6908 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile) {
6909 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006910 for (uint32_t n = 1; n <= 8; n++) {
6911 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006912 GemmMicrokernelTester()
6913 .mr(6)
6914 .nr(8)
6915 .kr(1)
6916 .sr(1)
6917 .m(m)
6918 .n(n)
6919 .k(4)
6920 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006921 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006922 }
6923 }
6924 }
6925
6926 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
6927 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6928 for (uint32_t m = 1; m <= 6; m++) {
6929 GemmMicrokernelTester()
6930 .mr(6)
6931 .nr(8)
6932 .kr(1)
6933 .sr(1)
6934 .m(m)
6935 .n(8)
6936 .k(4)
6937 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006938 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006939 }
6940 }
6941
6942 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
6943 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6944 for (uint32_t n = 1; n <= 8; n++) {
6945 GemmMicrokernelTester()
6946 .mr(6)
6947 .nr(8)
6948 .kr(1)
6949 .sr(1)
6950 .m(6)
6951 .n(n)
6952 .k(4)
6953 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006954 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006955 }
6956 }
6957
6958 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4) {
6959 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6960 for (size_t k = 1; k < 4; k++) {
6961 GemmMicrokernelTester()
6962 .mr(6)
6963 .nr(8)
6964 .kr(1)
6965 .sr(1)
6966 .m(6)
6967 .n(8)
6968 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006969 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006970 }
6971 }
6972
6973 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_strided_a) {
6974 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6975 for (size_t k = 1; k < 4; k++) {
6976 GemmMicrokernelTester()
6977 .mr(6)
6978 .nr(8)
6979 .kr(1)
6980 .sr(1)
6981 .m(6)
6982 .n(8)
6983 .k(k)
6984 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006985 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006986 }
6987 }
6988
6989 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_subtile) {
6990 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
6991 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006992 for (uint32_t n = 1; n <= 8; n++) {
6993 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07006994 GemmMicrokernelTester()
6995 .mr(6)
6996 .nr(8)
6997 .kr(1)
6998 .sr(1)
6999 .m(m)
7000 .n(n)
7001 .k(k)
7002 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007003 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007004 }
7005 }
7006 }
7007 }
7008
7009 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4) {
7010 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7011 for (size_t k = 5; k < 8; k++) {
7012 GemmMicrokernelTester()
7013 .mr(6)
7014 .nr(8)
7015 .kr(1)
7016 .sr(1)
7017 .m(6)
7018 .n(8)
7019 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007020 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007021 }
7022 }
7023
7024 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_strided_a) {
7025 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7026 for (size_t k = 5; k < 8; k++) {
7027 GemmMicrokernelTester()
7028 .mr(6)
7029 .nr(8)
7030 .kr(1)
7031 .sr(1)
7032 .m(6)
7033 .n(8)
7034 .k(k)
7035 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007036 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007037 }
7038 }
7039
7040 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_subtile) {
7041 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7042 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007043 for (uint32_t n = 1; n <= 8; n++) {
7044 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007045 GemmMicrokernelTester()
7046 .mr(6)
7047 .nr(8)
7048 .kr(1)
7049 .sr(1)
7050 .m(m)
7051 .n(n)
7052 .k(k)
7053 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007054 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007055 }
7056 }
7057 }
7058 }
7059
7060 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_div_4) {
7061 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7062 for (size_t k = 8; k <= 40; k += 4) {
7063 GemmMicrokernelTester()
7064 .mr(6)
7065 .nr(8)
7066 .kr(1)
7067 .sr(1)
7068 .m(6)
7069 .n(8)
7070 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007071 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007072 }
7073 }
7074
7075 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_strided_a) {
7076 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7077 for (size_t k = 8; k <= 40; k += 4) {
7078 GemmMicrokernelTester()
7079 .mr(6)
7080 .nr(8)
7081 .kr(1)
7082 .sr(1)
7083 .m(6)
7084 .n(8)
7085 .k(k)
7086 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007087 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007088 }
7089 }
7090
7091 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_subtile) {
7092 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7093 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007094 for (uint32_t n = 1; n <= 8; n++) {
7095 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007096 GemmMicrokernelTester()
7097 .mr(6)
7098 .nr(8)
7099 .kr(1)
7100 .sr(1)
7101 .m(m)
7102 .n(n)
7103 .k(k)
7104 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007105 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007106 }
7107 }
7108 }
7109 }
7110
7111 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8) {
7112 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7113 for (uint32_t n = 9; n < 16; n++) {
7114 for (size_t k = 1; k <= 20; k += 5) {
7115 GemmMicrokernelTester()
7116 .mr(6)
7117 .nr(8)
7118 .kr(1)
7119 .sr(1)
7120 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007121 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007122 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007123 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007124 }
7125 }
7126 }
7127
7128 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
7129 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7130 for (uint32_t n = 9; n < 16; n++) {
7131 for (size_t k = 1; k <= 20; k += 5) {
7132 GemmMicrokernelTester()
7133 .mr(6)
7134 .nr(8)
7135 .kr(1)
7136 .sr(1)
7137 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007138 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007139 .k(k)
7140 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007141 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007142 }
7143 }
7144 }
7145
7146 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_a) {
7147 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7148 for (uint32_t n = 9; n < 16; n++) {
7149 for (size_t k = 1; k <= 20; k += 5) {
7150 GemmMicrokernelTester()
7151 .mr(6)
7152 .nr(8)
7153 .kr(1)
7154 .sr(1)
7155 .m(6)
7156 .n(n)
7157 .k(k)
7158 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007159 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007160 }
7161 }
7162 }
7163
7164 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_subtile) {
7165 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7166 for (uint32_t n = 9; n < 16; n++) {
7167 for (size_t k = 1; k <= 20; k += 5) {
7168 for (uint32_t m = 1; m <= 6; m++) {
7169 GemmMicrokernelTester()
7170 .mr(6)
7171 .nr(8)
7172 .kr(1)
7173 .sr(1)
7174 .m(m)
7175 .n(n)
7176 .k(k)
7177 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007178 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007179 }
7180 }
7181 }
7182 }
7183
7184 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_div_8) {
7185 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7186 for (uint32_t n = 16; n <= 24; n += 8) {
7187 for (size_t k = 1; k <= 20; k += 5) {
7188 GemmMicrokernelTester()
7189 .mr(6)
7190 .nr(8)
7191 .kr(1)
7192 .sr(1)
7193 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007194 .n(n)
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007195 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007196 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007197 }
7198 }
7199 }
7200
7201 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_cn) {
7202 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7203 for (uint32_t n = 16; n <= 24; n += 8) {
7204 for (size_t k = 1; k <= 20; k += 5) {
7205 GemmMicrokernelTester()
7206 .mr(6)
7207 .nr(8)
7208 .kr(1)
7209 .sr(1)
7210 .m(6)
7211 .n(n)
7212 .k(k)
7213 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007214 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007215 }
7216 }
7217 }
7218
7219 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_a) {
7220 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7221 for (uint32_t n = 16; n <= 24; n += 8) {
7222 for (size_t k = 1; k <= 20; k += 5) {
7223 GemmMicrokernelTester()
7224 .mr(6)
7225 .nr(8)
7226 .kr(1)
7227 .sr(1)
7228 .m(6)
7229 .n(n)
7230 .k(k)
7231 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007232 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007233 }
7234 }
7235 }
7236
7237 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_subtile) {
7238 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7239 for (uint32_t n = 16; n <= 24; n += 8) {
7240 for (size_t k = 1; k <= 20; k += 5) {
7241 for (uint32_t m = 1; m <= 6; m++) {
7242 GemmMicrokernelTester()
7243 .mr(6)
7244 .nr(8)
7245 .kr(1)
7246 .sr(1)
7247 .m(m)
7248 .n(n)
7249 .k(k)
7250 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007251 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007252 }
7253 }
7254 }
7255 }
7256
7257 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, strided_cm_subtile) {
7258 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7259 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007260 for (uint32_t n = 1; n <= 8; n++) {
7261 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007262 GemmMicrokernelTester()
7263 .mr(6)
7264 .nr(8)
7265 .kr(1)
7266 .sr(1)
7267 .m(m)
7268 .n(n)
7269 .k(k)
7270 .cm_stride(11)
7271 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007272 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007273 }
7274 }
7275 }
7276 }
7277
7278 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, qmin) {
7279 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7280 GemmMicrokernelTester()
7281 .mr(6)
7282 .nr(8)
7283 .kr(1)
7284 .sr(1)
7285 .m(6)
7286 .n(8)
7287 .k(4)
7288 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007289 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007290 }
7291
7292 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, qmax) {
7293 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7294 GemmMicrokernelTester()
7295 .mr(6)
7296 .nr(8)
7297 .kr(1)
7298 .sr(1)
7299 .m(6)
7300 .n(8)
7301 .k(4)
7302 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007303 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007304 }
7305
7306 TEST(F16_GEMM_MINMAX_6X8__AARCH64_NEONFP16ARITH_LD64, strided_cm) {
7307 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7308 GemmMicrokernelTester()
7309 .mr(6)
7310 .nr(8)
7311 .kr(1)
7312 .sr(1)
7313 .m(6)
7314 .n(8)
7315 .k(4)
7316 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007317 .Test(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardbddfbcd2020-04-15 12:32:41 -07007318 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08007319#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard3b8e5662020-04-20 12:12:53 -07007320
7321
Frank Barcharde4d3f762021-12-23 15:31:43 -08007322#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard3b8e5662020-04-20 12:12:53 -07007323 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4) {
7324 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7325 GemmMicrokernelTester()
7326 .mr(8)
7327 .nr(8)
7328 .kr(1)
7329 .sr(1)
7330 .m(8)
7331 .n(8)
7332 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007333 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007334 }
7335
7336 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, strided_cn) {
7337 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7338 GemmMicrokernelTester()
7339 .mr(8)
7340 .nr(8)
7341 .kr(1)
7342 .sr(1)
7343 .m(8)
7344 .n(8)
7345 .k(4)
7346 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007347 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007348 }
7349
7350 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_strided_a) {
7351 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7352 GemmMicrokernelTester()
7353 .mr(8)
7354 .nr(8)
7355 .kr(1)
7356 .sr(1)
7357 .m(8)
7358 .n(8)
7359 .k(4)
7360 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007361 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007362 }
7363
7364 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile) {
7365 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007366 for (uint32_t n = 1; n <= 8; n++) {
7367 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3b8e5662020-04-20 12:12:53 -07007368 GemmMicrokernelTester()
7369 .mr(8)
7370 .nr(8)
7371 .kr(1)
7372 .sr(1)
7373 .m(m)
7374 .n(n)
7375 .k(4)
7376 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007377 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007378 }
7379 }
7380 }
7381
7382 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
7383 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7384 for (uint32_t m = 1; m <= 8; m++) {
7385 GemmMicrokernelTester()
7386 .mr(8)
7387 .nr(8)
7388 .kr(1)
7389 .sr(1)
7390 .m(m)
7391 .n(8)
7392 .k(4)
7393 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007394 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007395 }
7396 }
7397
7398 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
7399 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7400 for (uint32_t n = 1; n <= 8; n++) {
7401 GemmMicrokernelTester()
7402 .mr(8)
7403 .nr(8)
7404 .kr(1)
7405 .sr(1)
7406 .m(8)
7407 .n(n)
7408 .k(4)
7409 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007410 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007411 }
7412 }
7413
7414 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4) {
7415 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7416 for (size_t k = 1; k < 4; k++) {
7417 GemmMicrokernelTester()
7418 .mr(8)
7419 .nr(8)
7420 .kr(1)
7421 .sr(1)
7422 .m(8)
7423 .n(8)
7424 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007425 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007426 }
7427 }
7428
7429 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_strided_a) {
7430 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7431 for (size_t k = 1; k < 4; k++) {
7432 GemmMicrokernelTester()
7433 .mr(8)
7434 .nr(8)
7435 .kr(1)
7436 .sr(1)
7437 .m(8)
7438 .n(8)
7439 .k(k)
7440 .a_stride(7)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007441 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007442 }
7443 }
7444
7445 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_lt_4_subtile) {
7446 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7447 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007448 for (uint32_t n = 1; n <= 8; n++) {
7449 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3b8e5662020-04-20 12:12:53 -07007450 GemmMicrokernelTester()
7451 .mr(8)
7452 .nr(8)
7453 .kr(1)
7454 .sr(1)
7455 .m(m)
7456 .n(n)
7457 .k(k)
7458 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007459 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007460 }
7461 }
7462 }
7463 }
7464
7465 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4) {
7466 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7467 for (size_t k = 5; k < 8; k++) {
7468 GemmMicrokernelTester()
7469 .mr(8)
7470 .nr(8)
7471 .kr(1)
7472 .sr(1)
7473 .m(8)
7474 .n(8)
7475 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007476 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007477 }
7478 }
7479
7480 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_strided_a) {
7481 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7482 for (size_t k = 5; k < 8; k++) {
7483 GemmMicrokernelTester()
7484 .mr(8)
7485 .nr(8)
7486 .kr(1)
7487 .sr(1)
7488 .m(8)
7489 .n(8)
7490 .k(k)
7491 .a_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007492 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007493 }
7494 }
7495
7496 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_gt_4_subtile) {
7497 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7498 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007499 for (uint32_t n = 1; n <= 8; n++) {
7500 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3b8e5662020-04-20 12:12:53 -07007501 GemmMicrokernelTester()
7502 .mr(8)
7503 .nr(8)
7504 .kr(1)
7505 .sr(1)
7506 .m(m)
7507 .n(n)
7508 .k(k)
7509 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007510 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007511 }
7512 }
7513 }
7514 }
7515
7516 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_div_4) {
7517 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7518 for (size_t k = 8; k <= 40; k += 4) {
7519 GemmMicrokernelTester()
7520 .mr(8)
7521 .nr(8)
7522 .kr(1)
7523 .sr(1)
7524 .m(8)
7525 .n(8)
7526 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007527 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007528 }
7529 }
7530
7531 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_strided_a) {
7532 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7533 for (size_t k = 8; k <= 40; k += 4) {
7534 GemmMicrokernelTester()
7535 .mr(8)
7536 .nr(8)
7537 .kr(1)
7538 .sr(1)
7539 .m(8)
7540 .n(8)
7541 .k(k)
7542 .a_stride(43)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007543 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007544 }
7545 }
7546
7547 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, k_div_4_subtile) {
7548 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7549 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007550 for (uint32_t n = 1; n <= 8; n++) {
7551 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3b8e5662020-04-20 12:12:53 -07007552 GemmMicrokernelTester()
7553 .mr(8)
7554 .nr(8)
7555 .kr(1)
7556 .sr(1)
7557 .m(m)
7558 .n(n)
7559 .k(k)
7560 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007561 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007562 }
7563 }
7564 }
7565 }
7566
7567 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8) {
7568 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7569 for (uint32_t n = 9; n < 16; n++) {
7570 for (size_t k = 1; k <= 20; k += 5) {
7571 GemmMicrokernelTester()
7572 .mr(8)
7573 .nr(8)
7574 .kr(1)
7575 .sr(1)
7576 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007577 .n(n)
Frank Barchard3b8e5662020-04-20 12:12:53 -07007578 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007579 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007580 }
7581 }
7582 }
7583
7584 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
7585 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7586 for (uint32_t n = 9; n < 16; n++) {
7587 for (size_t k = 1; k <= 20; k += 5) {
7588 GemmMicrokernelTester()
7589 .mr(8)
7590 .nr(8)
7591 .kr(1)
7592 .sr(1)
7593 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007594 .n(n)
Frank Barchard3b8e5662020-04-20 12:12:53 -07007595 .k(k)
7596 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007597 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007598 }
7599 }
7600 }
7601
7602 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_strided_a) {
7603 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7604 for (uint32_t n = 9; n < 16; n++) {
7605 for (size_t k = 1; k <= 20; k += 5) {
7606 GemmMicrokernelTester()
7607 .mr(8)
7608 .nr(8)
7609 .kr(1)
7610 .sr(1)
7611 .m(8)
7612 .n(n)
7613 .k(k)
7614 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007615 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007616 }
7617 }
7618 }
7619
7620 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_gt_8_subtile) {
7621 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7622 for (uint32_t n = 9; n < 16; n++) {
7623 for (size_t k = 1; k <= 20; k += 5) {
7624 for (uint32_t m = 1; m <= 8; m++) {
7625 GemmMicrokernelTester()
7626 .mr(8)
7627 .nr(8)
7628 .kr(1)
7629 .sr(1)
7630 .m(m)
7631 .n(n)
7632 .k(k)
7633 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007634 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007635 }
7636 }
7637 }
7638 }
7639
7640 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_div_8) {
7641 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7642 for (uint32_t n = 16; n <= 24; n += 8) {
7643 for (size_t k = 1; k <= 20; k += 5) {
7644 GemmMicrokernelTester()
7645 .mr(8)
7646 .nr(8)
7647 .kr(1)
7648 .sr(1)
7649 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007650 .n(n)
Frank Barchard3b8e5662020-04-20 12:12:53 -07007651 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007652 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007653 }
7654 }
7655 }
7656
7657 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_cn) {
7658 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7659 for (uint32_t n = 16; n <= 24; n += 8) {
7660 for (size_t k = 1; k <= 20; k += 5) {
7661 GemmMicrokernelTester()
7662 .mr(8)
7663 .nr(8)
7664 .kr(1)
7665 .sr(1)
7666 .m(8)
7667 .n(n)
7668 .k(k)
7669 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007670 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007671 }
7672 }
7673 }
7674
7675 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_strided_a) {
7676 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7677 for (uint32_t n = 16; n <= 24; n += 8) {
7678 for (size_t k = 1; k <= 20; k += 5) {
7679 GemmMicrokernelTester()
7680 .mr(8)
7681 .nr(8)
7682 .kr(1)
7683 .sr(1)
7684 .m(8)
7685 .n(n)
7686 .k(k)
7687 .a_stride(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007688 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007689 }
7690 }
7691 }
7692
7693 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, n_div_8_subtile) {
7694 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7695 for (uint32_t n = 16; n <= 24; n += 8) {
7696 for (size_t k = 1; k <= 20; k += 5) {
7697 for (uint32_t m = 1; m <= 8; m++) {
7698 GemmMicrokernelTester()
7699 .mr(8)
7700 .nr(8)
7701 .kr(1)
7702 .sr(1)
7703 .m(m)
7704 .n(n)
7705 .k(k)
7706 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007707 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007708 }
7709 }
7710 }
7711 }
7712
7713 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, strided_cm_subtile) {
7714 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7715 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007716 for (uint32_t n = 1; n <= 8; n++) {
7717 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3b8e5662020-04-20 12:12:53 -07007718 GemmMicrokernelTester()
7719 .mr(8)
7720 .nr(8)
7721 .kr(1)
7722 .sr(1)
7723 .m(m)
7724 .n(n)
7725 .k(k)
7726 .cm_stride(11)
7727 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007728 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007729 }
7730 }
7731 }
7732 }
7733
7734 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, qmin) {
7735 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7736 GemmMicrokernelTester()
7737 .mr(8)
7738 .nr(8)
7739 .kr(1)
7740 .sr(1)
7741 .m(8)
7742 .n(8)
7743 .k(4)
7744 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007745 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007746 }
7747
7748 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, qmax) {
7749 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7750 GemmMicrokernelTester()
7751 .mr(8)
7752 .nr(8)
7753 .kr(1)
7754 .sr(1)
7755 .m(8)
7756 .n(8)
7757 .k(4)
7758 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007759 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007760 }
7761
7762 TEST(F16_GEMM_MINMAX_8X8__AARCH64_NEONFP16ARITH_LD64, strided_cm) {
7763 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
7764 GemmMicrokernelTester()
7765 .mr(8)
7766 .nr(8)
7767 .kr(1)
7768 .sr(1)
7769 .m(8)
7770 .n(8)
7771 .k(4)
7772 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007773 .Test(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3b8e5662020-04-20 12:12:53 -07007774 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08007775#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhanc4302c22022-01-06 19:27:03 -08007776
7777
7778#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7779 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1) {
7780 TEST_REQUIRES_X86_AVX2;
7781 GemmMicrokernelTester()
7782 .mr(1)
7783 .nr(8)
7784 .kr(1)
7785 .sr(1)
7786 .m(1)
7787 .n(8)
7788 .k(1)
7789 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7790 }
7791
7792 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, strided_cn) {
7793 TEST_REQUIRES_X86_AVX2;
7794 GemmMicrokernelTester()
7795 .mr(1)
7796 .nr(8)
7797 .kr(1)
7798 .sr(1)
7799 .m(1)
7800 .n(8)
7801 .k(1)
7802 .cn_stride(11)
7803 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7804 }
7805
7806 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_strided_a) {
7807 TEST_REQUIRES_X86_AVX2;
7808 GemmMicrokernelTester()
7809 .mr(1)
7810 .nr(8)
7811 .kr(1)
7812 .sr(1)
7813 .m(1)
7814 .n(8)
7815 .k(1)
7816 .a_stride(3)
7817 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7818 }
7819
7820 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_subtile) {
7821 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007822 for (uint32_t n = 1; n <= 8; n++) {
7823 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08007824 GemmMicrokernelTester()
7825 .mr(1)
7826 .nr(8)
7827 .kr(1)
7828 .sr(1)
7829 .m(m)
7830 .n(n)
7831 .k(1)
7832 .iterations(1)
7833 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7834 }
7835 }
7836 }
7837
7838 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
7839 TEST_REQUIRES_X86_AVX2;
7840 for (uint32_t m = 1; m <= 1; m++) {
7841 GemmMicrokernelTester()
7842 .mr(1)
7843 .nr(8)
7844 .kr(1)
7845 .sr(1)
7846 .m(m)
7847 .n(8)
7848 .k(1)
7849 .iterations(1)
7850 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7851 }
7852 }
7853
7854 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
7855 TEST_REQUIRES_X86_AVX2;
7856 for (uint32_t n = 1; n <= 8; n++) {
7857 GemmMicrokernelTester()
7858 .mr(1)
7859 .nr(8)
7860 .kr(1)
7861 .sr(1)
7862 .m(1)
7863 .n(n)
7864 .k(1)
7865 .iterations(1)
7866 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7867 }
7868 }
7869
7870 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_gt_1) {
7871 TEST_REQUIRES_X86_AVX2;
7872 for (size_t k = 2; k < 10; k++) {
7873 GemmMicrokernelTester()
7874 .mr(1)
7875 .nr(8)
7876 .kr(1)
7877 .sr(1)
7878 .m(1)
7879 .n(8)
7880 .k(k)
7881 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7882 }
7883 }
7884
7885 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_gt_1_strided_a) {
7886 TEST_REQUIRES_X86_AVX2;
7887 for (size_t k = 2; k < 10; k++) {
7888 GemmMicrokernelTester()
7889 .mr(1)
7890 .nr(8)
7891 .kr(1)
7892 .sr(1)
7893 .m(1)
7894 .n(8)
7895 .k(k)
7896 .a_stride(11)
7897 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7898 }
7899 }
7900
7901 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, k_gt_1_subtile) {
7902 TEST_REQUIRES_X86_AVX2;
7903 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007904 for (uint32_t n = 1; n <= 8; n++) {
7905 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08007906 GemmMicrokernelTester()
7907 .mr(1)
7908 .nr(8)
7909 .kr(1)
7910 .sr(1)
7911 .m(m)
7912 .n(n)
7913 .k(k)
7914 .iterations(1)
7915 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7916 }
7917 }
7918 }
7919 }
7920
7921 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8) {
7922 TEST_REQUIRES_X86_AVX2;
7923 for (uint32_t n = 9; n < 16; n++) {
7924 for (size_t k = 1; k <= 5; k += 2) {
7925 GemmMicrokernelTester()
7926 .mr(1)
7927 .nr(8)
7928 .kr(1)
7929 .sr(1)
7930 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007931 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007932 .k(k)
7933 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7934 }
7935 }
7936 }
7937
7938 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
7939 TEST_REQUIRES_X86_AVX2;
7940 for (uint32_t n = 9; n < 16; n++) {
7941 for (size_t k = 1; k <= 5; k += 2) {
7942 GemmMicrokernelTester()
7943 .mr(1)
7944 .nr(8)
7945 .kr(1)
7946 .sr(1)
7947 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007948 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007949 .k(k)
7950 .cn_stride(11)
7951 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7952 }
7953 }
7954 }
7955
7956 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8_strided_a) {
7957 TEST_REQUIRES_X86_AVX2;
7958 for (uint32_t n = 9; n < 16; n++) {
7959 for (size_t k = 1; k <= 5; k += 2) {
7960 GemmMicrokernelTester()
7961 .mr(1)
7962 .nr(8)
7963 .kr(1)
7964 .sr(1)
7965 .m(1)
7966 .n(n)
7967 .k(k)
7968 .a_stride(7)
7969 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7970 }
7971 }
7972 }
7973
7974 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8_subtile) {
7975 TEST_REQUIRES_X86_AVX2;
7976 for (uint32_t n = 9; n < 16; n++) {
7977 for (size_t k = 1; k <= 5; k += 2) {
7978 for (uint32_t m = 1; m <= 1; m++) {
7979 GemmMicrokernelTester()
7980 .mr(1)
7981 .nr(8)
7982 .kr(1)
7983 .sr(1)
7984 .m(m)
7985 .n(n)
7986 .k(k)
7987 .iterations(1)
7988 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7989 }
7990 }
7991 }
7992 }
7993
7994 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8) {
7995 TEST_REQUIRES_X86_AVX2;
7996 for (uint32_t n = 16; n <= 24; n += 8) {
7997 for (size_t k = 1; k <= 5; k += 2) {
7998 GemmMicrokernelTester()
7999 .mr(1)
8000 .nr(8)
8001 .kr(1)
8002 .sr(1)
8003 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008004 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008005 .k(k)
8006 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8007 }
8008 }
8009 }
8010
8011 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8_strided_cn) {
8012 TEST_REQUIRES_X86_AVX2;
8013 for (uint32_t n = 16; n <= 24; n += 8) {
8014 for (size_t k = 1; k <= 5; k += 2) {
8015 GemmMicrokernelTester()
8016 .mr(1)
8017 .nr(8)
8018 .kr(1)
8019 .sr(1)
8020 .m(1)
8021 .n(n)
8022 .k(k)
8023 .cn_stride(11)
8024 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8025 }
8026 }
8027 }
8028
8029 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8_strided_a) {
8030 TEST_REQUIRES_X86_AVX2;
8031 for (uint32_t n = 16; n <= 24; n += 8) {
8032 for (size_t k = 1; k <= 5; k += 2) {
8033 GemmMicrokernelTester()
8034 .mr(1)
8035 .nr(8)
8036 .kr(1)
8037 .sr(1)
8038 .m(1)
8039 .n(n)
8040 .k(k)
8041 .a_stride(7)
8042 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8043 }
8044 }
8045 }
8046
8047 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8_subtile) {
8048 TEST_REQUIRES_X86_AVX2;
8049 for (uint32_t n = 16; n <= 24; n += 8) {
8050 for (size_t k = 1; k <= 5; k += 2) {
8051 for (uint32_t m = 1; m <= 1; m++) {
8052 GemmMicrokernelTester()
8053 .mr(1)
8054 .nr(8)
8055 .kr(1)
8056 .sr(1)
8057 .m(m)
8058 .n(n)
8059 .k(k)
8060 .iterations(1)
8061 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8062 }
8063 }
8064 }
8065 }
8066
8067 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, strided_cm_subtile) {
8068 TEST_REQUIRES_X86_AVX2;
8069 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008070 for (uint32_t n = 1; n <= 8; n++) {
8071 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008072 GemmMicrokernelTester()
8073 .mr(1)
8074 .nr(8)
8075 .kr(1)
8076 .sr(1)
8077 .m(m)
8078 .n(n)
8079 .k(k)
8080 .cm_stride(11)
8081 .iterations(1)
8082 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8083 }
8084 }
8085 }
8086 }
8087
8088 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, qmin) {
8089 TEST_REQUIRES_X86_AVX2;
8090 GemmMicrokernelTester()
8091 .mr(1)
8092 .nr(8)
8093 .kr(1)
8094 .sr(1)
8095 .m(1)
8096 .n(8)
8097 .k(1)
8098 .qmin(128)
8099 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8100 }
8101
8102 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, qmax) {
8103 TEST_REQUIRES_X86_AVX2;
8104 GemmMicrokernelTester()
8105 .mr(1)
8106 .nr(8)
8107 .kr(1)
8108 .sr(1)
8109 .m(1)
8110 .n(8)
8111 .k(1)
8112 .qmax(128)
8113 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8114 }
8115
8116 TEST(F16_GEMM_MINMAX_1X8__AVX2_BROADCAST, strided_cm) {
8117 TEST_REQUIRES_X86_AVX2;
8118 GemmMicrokernelTester()
8119 .mr(1)
8120 .nr(8)
8121 .kr(1)
8122 .sr(1)
8123 .m(1)
8124 .n(8)
8125 .k(1)
8126 .cm_stride(11)
8127 .Test(xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8128 }
8129#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8130
8131
8132#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8133 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1) {
8134 TEST_REQUIRES_X86_AVX2;
8135 GemmMicrokernelTester()
8136 .mr(4)
8137 .nr(8)
8138 .kr(1)
8139 .sr(1)
8140 .m(4)
8141 .n(8)
8142 .k(1)
8143 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8144 }
8145
8146 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, strided_cn) {
8147 TEST_REQUIRES_X86_AVX2;
8148 GemmMicrokernelTester()
8149 .mr(4)
8150 .nr(8)
8151 .kr(1)
8152 .sr(1)
8153 .m(4)
8154 .n(8)
8155 .k(1)
8156 .cn_stride(11)
8157 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8158 }
8159
8160 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_strided_a) {
8161 TEST_REQUIRES_X86_AVX2;
8162 GemmMicrokernelTester()
8163 .mr(4)
8164 .nr(8)
8165 .kr(1)
8166 .sr(1)
8167 .m(4)
8168 .n(8)
8169 .k(1)
8170 .a_stride(3)
8171 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8172 }
8173
8174 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_subtile) {
8175 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008176 for (uint32_t n = 1; n <= 8; n++) {
8177 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008178 GemmMicrokernelTester()
8179 .mr(4)
8180 .nr(8)
8181 .kr(1)
8182 .sr(1)
8183 .m(m)
8184 .n(n)
8185 .k(1)
8186 .iterations(1)
8187 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8188 }
8189 }
8190 }
8191
8192 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
8193 TEST_REQUIRES_X86_AVX2;
8194 for (uint32_t m = 1; m <= 4; m++) {
8195 GemmMicrokernelTester()
8196 .mr(4)
8197 .nr(8)
8198 .kr(1)
8199 .sr(1)
8200 .m(m)
8201 .n(8)
8202 .k(1)
8203 .iterations(1)
8204 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8205 }
8206 }
8207
8208 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
8209 TEST_REQUIRES_X86_AVX2;
8210 for (uint32_t n = 1; n <= 8; n++) {
8211 GemmMicrokernelTester()
8212 .mr(4)
8213 .nr(8)
8214 .kr(1)
8215 .sr(1)
8216 .m(4)
8217 .n(n)
8218 .k(1)
8219 .iterations(1)
8220 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8221 }
8222 }
8223
8224 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_gt_1) {
8225 TEST_REQUIRES_X86_AVX2;
8226 for (size_t k = 2; k < 10; k++) {
8227 GemmMicrokernelTester()
8228 .mr(4)
8229 .nr(8)
8230 .kr(1)
8231 .sr(1)
8232 .m(4)
8233 .n(8)
8234 .k(k)
8235 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8236 }
8237 }
8238
8239 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_gt_1_strided_a) {
8240 TEST_REQUIRES_X86_AVX2;
8241 for (size_t k = 2; k < 10; k++) {
8242 GemmMicrokernelTester()
8243 .mr(4)
8244 .nr(8)
8245 .kr(1)
8246 .sr(1)
8247 .m(4)
8248 .n(8)
8249 .k(k)
8250 .a_stride(11)
8251 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8252 }
8253 }
8254
8255 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, k_gt_1_subtile) {
8256 TEST_REQUIRES_X86_AVX2;
8257 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008258 for (uint32_t n = 1; n <= 8; n++) {
8259 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008260 GemmMicrokernelTester()
8261 .mr(4)
8262 .nr(8)
8263 .kr(1)
8264 .sr(1)
8265 .m(m)
8266 .n(n)
8267 .k(k)
8268 .iterations(1)
8269 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8270 }
8271 }
8272 }
8273 }
8274
8275 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8) {
8276 TEST_REQUIRES_X86_AVX2;
8277 for (uint32_t n = 9; n < 16; n++) {
8278 for (size_t k = 1; k <= 5; k += 2) {
8279 GemmMicrokernelTester()
8280 .mr(4)
8281 .nr(8)
8282 .kr(1)
8283 .sr(1)
8284 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008285 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008286 .k(k)
8287 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8288 }
8289 }
8290 }
8291
8292 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
8293 TEST_REQUIRES_X86_AVX2;
8294 for (uint32_t n = 9; n < 16; n++) {
8295 for (size_t k = 1; k <= 5; k += 2) {
8296 GemmMicrokernelTester()
8297 .mr(4)
8298 .nr(8)
8299 .kr(1)
8300 .sr(1)
8301 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008302 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008303 .k(k)
8304 .cn_stride(11)
8305 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8306 }
8307 }
8308 }
8309
8310 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8_strided_a) {
8311 TEST_REQUIRES_X86_AVX2;
8312 for (uint32_t n = 9; n < 16; n++) {
8313 for (size_t k = 1; k <= 5; k += 2) {
8314 GemmMicrokernelTester()
8315 .mr(4)
8316 .nr(8)
8317 .kr(1)
8318 .sr(1)
8319 .m(4)
8320 .n(n)
8321 .k(k)
8322 .a_stride(7)
8323 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8324 }
8325 }
8326 }
8327
8328 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8_subtile) {
8329 TEST_REQUIRES_X86_AVX2;
8330 for (uint32_t n = 9; n < 16; n++) {
8331 for (size_t k = 1; k <= 5; k += 2) {
8332 for (uint32_t m = 1; m <= 4; m++) {
8333 GemmMicrokernelTester()
8334 .mr(4)
8335 .nr(8)
8336 .kr(1)
8337 .sr(1)
8338 .m(m)
8339 .n(n)
8340 .k(k)
8341 .iterations(1)
8342 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8343 }
8344 }
8345 }
8346 }
8347
8348 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8) {
8349 TEST_REQUIRES_X86_AVX2;
8350 for (uint32_t n = 16; n <= 24; n += 8) {
8351 for (size_t k = 1; k <= 5; k += 2) {
8352 GemmMicrokernelTester()
8353 .mr(4)
8354 .nr(8)
8355 .kr(1)
8356 .sr(1)
8357 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008358 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008359 .k(k)
8360 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8361 }
8362 }
8363 }
8364
8365 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8_strided_cn) {
8366 TEST_REQUIRES_X86_AVX2;
8367 for (uint32_t n = 16; n <= 24; n += 8) {
8368 for (size_t k = 1; k <= 5; k += 2) {
8369 GemmMicrokernelTester()
8370 .mr(4)
8371 .nr(8)
8372 .kr(1)
8373 .sr(1)
8374 .m(4)
8375 .n(n)
8376 .k(k)
8377 .cn_stride(11)
8378 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8379 }
8380 }
8381 }
8382
8383 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8_strided_a) {
8384 TEST_REQUIRES_X86_AVX2;
8385 for (uint32_t n = 16; n <= 24; n += 8) {
8386 for (size_t k = 1; k <= 5; k += 2) {
8387 GemmMicrokernelTester()
8388 .mr(4)
8389 .nr(8)
8390 .kr(1)
8391 .sr(1)
8392 .m(4)
8393 .n(n)
8394 .k(k)
8395 .a_stride(7)
8396 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8397 }
8398 }
8399 }
8400
8401 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8_subtile) {
8402 TEST_REQUIRES_X86_AVX2;
8403 for (uint32_t n = 16; n <= 24; n += 8) {
8404 for (size_t k = 1; k <= 5; k += 2) {
8405 for (uint32_t m = 1; m <= 4; m++) {
8406 GemmMicrokernelTester()
8407 .mr(4)
8408 .nr(8)
8409 .kr(1)
8410 .sr(1)
8411 .m(m)
8412 .n(n)
8413 .k(k)
8414 .iterations(1)
8415 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8416 }
8417 }
8418 }
8419 }
8420
8421 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, strided_cm_subtile) {
8422 TEST_REQUIRES_X86_AVX2;
8423 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008424 for (uint32_t n = 1; n <= 8; n++) {
8425 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008426 GemmMicrokernelTester()
8427 .mr(4)
8428 .nr(8)
8429 .kr(1)
8430 .sr(1)
8431 .m(m)
8432 .n(n)
8433 .k(k)
8434 .cm_stride(11)
8435 .iterations(1)
8436 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8437 }
8438 }
8439 }
8440 }
8441
8442 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, qmin) {
8443 TEST_REQUIRES_X86_AVX2;
8444 GemmMicrokernelTester()
8445 .mr(4)
8446 .nr(8)
8447 .kr(1)
8448 .sr(1)
8449 .m(4)
8450 .n(8)
8451 .k(1)
8452 .qmin(128)
8453 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8454 }
8455
8456 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, qmax) {
8457 TEST_REQUIRES_X86_AVX2;
8458 GemmMicrokernelTester()
8459 .mr(4)
8460 .nr(8)
8461 .kr(1)
8462 .sr(1)
8463 .m(4)
8464 .n(8)
8465 .k(1)
8466 .qmax(128)
8467 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8468 }
8469
8470 TEST(F16_GEMM_MINMAX_4X8__AVX2_BROADCAST, strided_cm) {
8471 TEST_REQUIRES_X86_AVX2;
8472 GemmMicrokernelTester()
8473 .mr(4)
8474 .nr(8)
8475 .kr(1)
8476 .sr(1)
8477 .m(4)
8478 .n(8)
8479 .k(1)
8480 .cm_stride(11)
8481 .Test(xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8482 }
8483#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8484
8485
8486#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8487 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1) {
8488 TEST_REQUIRES_X86_AVX2;
8489 GemmMicrokernelTester()
8490 .mr(5)
8491 .nr(8)
8492 .kr(1)
8493 .sr(1)
8494 .m(5)
8495 .n(8)
8496 .k(1)
8497 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8498 }
8499
8500 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, strided_cn) {
8501 TEST_REQUIRES_X86_AVX2;
8502 GemmMicrokernelTester()
8503 .mr(5)
8504 .nr(8)
8505 .kr(1)
8506 .sr(1)
8507 .m(5)
8508 .n(8)
8509 .k(1)
8510 .cn_stride(11)
8511 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8512 }
8513
8514 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_strided_a) {
8515 TEST_REQUIRES_X86_AVX2;
8516 GemmMicrokernelTester()
8517 .mr(5)
8518 .nr(8)
8519 .kr(1)
8520 .sr(1)
8521 .m(5)
8522 .n(8)
8523 .k(1)
8524 .a_stride(3)
8525 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8526 }
8527
8528 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_subtile) {
8529 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008530 for (uint32_t n = 1; n <= 8; n++) {
8531 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008532 GemmMicrokernelTester()
8533 .mr(5)
8534 .nr(8)
8535 .kr(1)
8536 .sr(1)
8537 .m(m)
8538 .n(n)
8539 .k(1)
8540 .iterations(1)
8541 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8542 }
8543 }
8544 }
8545
8546 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
8547 TEST_REQUIRES_X86_AVX2;
8548 for (uint32_t m = 1; m <= 5; m++) {
8549 GemmMicrokernelTester()
8550 .mr(5)
8551 .nr(8)
8552 .kr(1)
8553 .sr(1)
8554 .m(m)
8555 .n(8)
8556 .k(1)
8557 .iterations(1)
8558 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8559 }
8560 }
8561
8562 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
8563 TEST_REQUIRES_X86_AVX2;
8564 for (uint32_t n = 1; n <= 8; n++) {
8565 GemmMicrokernelTester()
8566 .mr(5)
8567 .nr(8)
8568 .kr(1)
8569 .sr(1)
8570 .m(5)
8571 .n(n)
8572 .k(1)
8573 .iterations(1)
8574 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8575 }
8576 }
8577
8578 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_gt_1) {
8579 TEST_REQUIRES_X86_AVX2;
8580 for (size_t k = 2; k < 10; k++) {
8581 GemmMicrokernelTester()
8582 .mr(5)
8583 .nr(8)
8584 .kr(1)
8585 .sr(1)
8586 .m(5)
8587 .n(8)
8588 .k(k)
8589 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8590 }
8591 }
8592
8593 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_gt_1_strided_a) {
8594 TEST_REQUIRES_X86_AVX2;
8595 for (size_t k = 2; k < 10; k++) {
8596 GemmMicrokernelTester()
8597 .mr(5)
8598 .nr(8)
8599 .kr(1)
8600 .sr(1)
8601 .m(5)
8602 .n(8)
8603 .k(k)
8604 .a_stride(11)
8605 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8606 }
8607 }
8608
8609 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, k_gt_1_subtile) {
8610 TEST_REQUIRES_X86_AVX2;
8611 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008612 for (uint32_t n = 1; n <= 8; n++) {
8613 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008614 GemmMicrokernelTester()
8615 .mr(5)
8616 .nr(8)
8617 .kr(1)
8618 .sr(1)
8619 .m(m)
8620 .n(n)
8621 .k(k)
8622 .iterations(1)
8623 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8624 }
8625 }
8626 }
8627 }
8628
8629 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8) {
8630 TEST_REQUIRES_X86_AVX2;
8631 for (uint32_t n = 9; n < 16; n++) {
8632 for (size_t k = 1; k <= 5; k += 2) {
8633 GemmMicrokernelTester()
8634 .mr(5)
8635 .nr(8)
8636 .kr(1)
8637 .sr(1)
8638 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008639 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008640 .k(k)
8641 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8642 }
8643 }
8644 }
8645
8646 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
8647 TEST_REQUIRES_X86_AVX2;
8648 for (uint32_t n = 9; n < 16; n++) {
8649 for (size_t k = 1; k <= 5; k += 2) {
8650 GemmMicrokernelTester()
8651 .mr(5)
8652 .nr(8)
8653 .kr(1)
8654 .sr(1)
8655 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008656 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008657 .k(k)
8658 .cn_stride(11)
8659 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8660 }
8661 }
8662 }
8663
8664 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8_strided_a) {
8665 TEST_REQUIRES_X86_AVX2;
8666 for (uint32_t n = 9; n < 16; n++) {
8667 for (size_t k = 1; k <= 5; k += 2) {
8668 GemmMicrokernelTester()
8669 .mr(5)
8670 .nr(8)
8671 .kr(1)
8672 .sr(1)
8673 .m(5)
8674 .n(n)
8675 .k(k)
8676 .a_stride(7)
8677 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8678 }
8679 }
8680 }
8681
8682 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8_subtile) {
8683 TEST_REQUIRES_X86_AVX2;
8684 for (uint32_t n = 9; n < 16; n++) {
8685 for (size_t k = 1; k <= 5; k += 2) {
8686 for (uint32_t m = 1; m <= 5; m++) {
8687 GemmMicrokernelTester()
8688 .mr(5)
8689 .nr(8)
8690 .kr(1)
8691 .sr(1)
8692 .m(m)
8693 .n(n)
8694 .k(k)
8695 .iterations(1)
8696 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8697 }
8698 }
8699 }
8700 }
8701
8702 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8) {
8703 TEST_REQUIRES_X86_AVX2;
8704 for (uint32_t n = 16; n <= 24; n += 8) {
8705 for (size_t k = 1; k <= 5; k += 2) {
8706 GemmMicrokernelTester()
8707 .mr(5)
8708 .nr(8)
8709 .kr(1)
8710 .sr(1)
8711 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008712 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008713 .k(k)
8714 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8715 }
8716 }
8717 }
8718
8719 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8_strided_cn) {
8720 TEST_REQUIRES_X86_AVX2;
8721 for (uint32_t n = 16; n <= 24; n += 8) {
8722 for (size_t k = 1; k <= 5; k += 2) {
8723 GemmMicrokernelTester()
8724 .mr(5)
8725 .nr(8)
8726 .kr(1)
8727 .sr(1)
8728 .m(5)
8729 .n(n)
8730 .k(k)
8731 .cn_stride(11)
8732 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8733 }
8734 }
8735 }
8736
8737 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8_strided_a) {
8738 TEST_REQUIRES_X86_AVX2;
8739 for (uint32_t n = 16; n <= 24; n += 8) {
8740 for (size_t k = 1; k <= 5; k += 2) {
8741 GemmMicrokernelTester()
8742 .mr(5)
8743 .nr(8)
8744 .kr(1)
8745 .sr(1)
8746 .m(5)
8747 .n(n)
8748 .k(k)
8749 .a_stride(7)
8750 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8751 }
8752 }
8753 }
8754
8755 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8_subtile) {
8756 TEST_REQUIRES_X86_AVX2;
8757 for (uint32_t n = 16; n <= 24; n += 8) {
8758 for (size_t k = 1; k <= 5; k += 2) {
8759 for (uint32_t m = 1; m <= 5; m++) {
8760 GemmMicrokernelTester()
8761 .mr(5)
8762 .nr(8)
8763 .kr(1)
8764 .sr(1)
8765 .m(m)
8766 .n(n)
8767 .k(k)
8768 .iterations(1)
8769 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8770 }
8771 }
8772 }
8773 }
8774
8775 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, strided_cm_subtile) {
8776 TEST_REQUIRES_X86_AVX2;
8777 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008778 for (uint32_t n = 1; n <= 8; n++) {
8779 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008780 GemmMicrokernelTester()
8781 .mr(5)
8782 .nr(8)
8783 .kr(1)
8784 .sr(1)
8785 .m(m)
8786 .n(n)
8787 .k(k)
8788 .cm_stride(11)
8789 .iterations(1)
8790 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8791 }
8792 }
8793 }
8794 }
8795
8796 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, qmin) {
8797 TEST_REQUIRES_X86_AVX2;
8798 GemmMicrokernelTester()
8799 .mr(5)
8800 .nr(8)
8801 .kr(1)
8802 .sr(1)
8803 .m(5)
8804 .n(8)
8805 .k(1)
8806 .qmin(128)
8807 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8808 }
8809
8810 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, qmax) {
8811 TEST_REQUIRES_X86_AVX2;
8812 GemmMicrokernelTester()
8813 .mr(5)
8814 .nr(8)
8815 .kr(1)
8816 .sr(1)
8817 .m(5)
8818 .n(8)
8819 .k(1)
8820 .qmax(128)
8821 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8822 }
8823
8824 TEST(F16_GEMM_MINMAX_5X8__AVX2_BROADCAST, strided_cm) {
8825 TEST_REQUIRES_X86_AVX2;
8826 GemmMicrokernelTester()
8827 .mr(5)
8828 .nr(8)
8829 .kr(1)
8830 .sr(1)
8831 .m(5)
8832 .n(8)
8833 .k(1)
8834 .cm_stride(11)
8835 .Test(xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8836 }
8837#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8838
8839
8840#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8841 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1) {
8842 TEST_REQUIRES_X86_AVX2;
8843 GemmMicrokernelTester()
8844 .mr(6)
8845 .nr(8)
8846 .kr(1)
8847 .sr(1)
8848 .m(6)
8849 .n(8)
8850 .k(1)
8851 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8852 }
8853
8854 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, strided_cn) {
8855 TEST_REQUIRES_X86_AVX2;
8856 GemmMicrokernelTester()
8857 .mr(6)
8858 .nr(8)
8859 .kr(1)
8860 .sr(1)
8861 .m(6)
8862 .n(8)
8863 .k(1)
8864 .cn_stride(11)
8865 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8866 }
8867
8868 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_strided_a) {
8869 TEST_REQUIRES_X86_AVX2;
8870 GemmMicrokernelTester()
8871 .mr(6)
8872 .nr(8)
8873 .kr(1)
8874 .sr(1)
8875 .m(6)
8876 .n(8)
8877 .k(1)
8878 .a_stride(3)
8879 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8880 }
8881
8882 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_subtile) {
8883 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008884 for (uint32_t n = 1; n <= 8; n++) {
8885 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008886 GemmMicrokernelTester()
8887 .mr(6)
8888 .nr(8)
8889 .kr(1)
8890 .sr(1)
8891 .m(m)
8892 .n(n)
8893 .k(1)
8894 .iterations(1)
8895 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8896 }
8897 }
8898 }
8899
8900 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
8901 TEST_REQUIRES_X86_AVX2;
8902 for (uint32_t m = 1; m <= 6; m++) {
8903 GemmMicrokernelTester()
8904 .mr(6)
8905 .nr(8)
8906 .kr(1)
8907 .sr(1)
8908 .m(m)
8909 .n(8)
8910 .k(1)
8911 .iterations(1)
8912 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8913 }
8914 }
8915
8916 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
8917 TEST_REQUIRES_X86_AVX2;
8918 for (uint32_t n = 1; n <= 8; n++) {
8919 GemmMicrokernelTester()
8920 .mr(6)
8921 .nr(8)
8922 .kr(1)
8923 .sr(1)
8924 .m(6)
8925 .n(n)
8926 .k(1)
8927 .iterations(1)
8928 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8929 }
8930 }
8931
8932 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_gt_1) {
8933 TEST_REQUIRES_X86_AVX2;
8934 for (size_t k = 2; k < 10; k++) {
8935 GemmMicrokernelTester()
8936 .mr(6)
8937 .nr(8)
8938 .kr(1)
8939 .sr(1)
8940 .m(6)
8941 .n(8)
8942 .k(k)
8943 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8944 }
8945 }
8946
8947 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_gt_1_strided_a) {
8948 TEST_REQUIRES_X86_AVX2;
8949 for (size_t k = 2; k < 10; k++) {
8950 GemmMicrokernelTester()
8951 .mr(6)
8952 .nr(8)
8953 .kr(1)
8954 .sr(1)
8955 .m(6)
8956 .n(8)
8957 .k(k)
8958 .a_stride(11)
8959 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8960 }
8961 }
8962
8963 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, k_gt_1_subtile) {
8964 TEST_REQUIRES_X86_AVX2;
8965 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008966 for (uint32_t n = 1; n <= 8; n++) {
8967 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08008968 GemmMicrokernelTester()
8969 .mr(6)
8970 .nr(8)
8971 .kr(1)
8972 .sr(1)
8973 .m(m)
8974 .n(n)
8975 .k(k)
8976 .iterations(1)
8977 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8978 }
8979 }
8980 }
8981 }
8982
8983 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8) {
8984 TEST_REQUIRES_X86_AVX2;
8985 for (uint32_t n = 9; n < 16; n++) {
8986 for (size_t k = 1; k <= 5; k += 2) {
8987 GemmMicrokernelTester()
8988 .mr(6)
8989 .nr(8)
8990 .kr(1)
8991 .sr(1)
8992 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008993 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08008994 .k(k)
8995 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
8996 }
8997 }
8998 }
8999
9000 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
9001 TEST_REQUIRES_X86_AVX2;
9002 for (uint32_t n = 9; n < 16; n++) {
9003 for (size_t k = 1; k <= 5; k += 2) {
9004 GemmMicrokernelTester()
9005 .mr(6)
9006 .nr(8)
9007 .kr(1)
9008 .sr(1)
9009 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009010 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009011 .k(k)
9012 .cn_stride(11)
9013 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9014 }
9015 }
9016 }
9017
9018 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8_strided_a) {
9019 TEST_REQUIRES_X86_AVX2;
9020 for (uint32_t n = 9; n < 16; n++) {
9021 for (size_t k = 1; k <= 5; k += 2) {
9022 GemmMicrokernelTester()
9023 .mr(6)
9024 .nr(8)
9025 .kr(1)
9026 .sr(1)
9027 .m(6)
9028 .n(n)
9029 .k(k)
9030 .a_stride(7)
9031 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9032 }
9033 }
9034 }
9035
9036 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8_subtile) {
9037 TEST_REQUIRES_X86_AVX2;
9038 for (uint32_t n = 9; n < 16; n++) {
9039 for (size_t k = 1; k <= 5; k += 2) {
9040 for (uint32_t m = 1; m <= 6; m++) {
9041 GemmMicrokernelTester()
9042 .mr(6)
9043 .nr(8)
9044 .kr(1)
9045 .sr(1)
9046 .m(m)
9047 .n(n)
9048 .k(k)
9049 .iterations(1)
9050 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9051 }
9052 }
9053 }
9054 }
9055
9056 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8) {
9057 TEST_REQUIRES_X86_AVX2;
9058 for (uint32_t n = 16; n <= 24; n += 8) {
9059 for (size_t k = 1; k <= 5; k += 2) {
9060 GemmMicrokernelTester()
9061 .mr(6)
9062 .nr(8)
9063 .kr(1)
9064 .sr(1)
9065 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009066 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009067 .k(k)
9068 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9069 }
9070 }
9071 }
9072
9073 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8_strided_cn) {
9074 TEST_REQUIRES_X86_AVX2;
9075 for (uint32_t n = 16; n <= 24; n += 8) {
9076 for (size_t k = 1; k <= 5; k += 2) {
9077 GemmMicrokernelTester()
9078 .mr(6)
9079 .nr(8)
9080 .kr(1)
9081 .sr(1)
9082 .m(6)
9083 .n(n)
9084 .k(k)
9085 .cn_stride(11)
9086 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9087 }
9088 }
9089 }
9090
9091 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8_strided_a) {
9092 TEST_REQUIRES_X86_AVX2;
9093 for (uint32_t n = 16; n <= 24; n += 8) {
9094 for (size_t k = 1; k <= 5; k += 2) {
9095 GemmMicrokernelTester()
9096 .mr(6)
9097 .nr(8)
9098 .kr(1)
9099 .sr(1)
9100 .m(6)
9101 .n(n)
9102 .k(k)
9103 .a_stride(7)
9104 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9105 }
9106 }
9107 }
9108
9109 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8_subtile) {
9110 TEST_REQUIRES_X86_AVX2;
9111 for (uint32_t n = 16; n <= 24; n += 8) {
9112 for (size_t k = 1; k <= 5; k += 2) {
9113 for (uint32_t m = 1; m <= 6; m++) {
9114 GemmMicrokernelTester()
9115 .mr(6)
9116 .nr(8)
9117 .kr(1)
9118 .sr(1)
9119 .m(m)
9120 .n(n)
9121 .k(k)
9122 .iterations(1)
9123 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9124 }
9125 }
9126 }
9127 }
9128
9129 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, strided_cm_subtile) {
9130 TEST_REQUIRES_X86_AVX2;
9131 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009132 for (uint32_t n = 1; n <= 8; n++) {
9133 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009134 GemmMicrokernelTester()
9135 .mr(6)
9136 .nr(8)
9137 .kr(1)
9138 .sr(1)
9139 .m(m)
9140 .n(n)
9141 .k(k)
9142 .cm_stride(11)
9143 .iterations(1)
9144 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9145 }
9146 }
9147 }
9148 }
9149
9150 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, qmin) {
9151 TEST_REQUIRES_X86_AVX2;
9152 GemmMicrokernelTester()
9153 .mr(6)
9154 .nr(8)
9155 .kr(1)
9156 .sr(1)
9157 .m(6)
9158 .n(8)
9159 .k(1)
9160 .qmin(128)
9161 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9162 }
9163
9164 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, qmax) {
9165 TEST_REQUIRES_X86_AVX2;
9166 GemmMicrokernelTester()
9167 .mr(6)
9168 .nr(8)
9169 .kr(1)
9170 .sr(1)
9171 .m(6)
9172 .n(8)
9173 .k(1)
9174 .qmax(128)
9175 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9176 }
9177
9178 TEST(F16_GEMM_MINMAX_6X8__AVX2_BROADCAST, strided_cm) {
9179 TEST_REQUIRES_X86_AVX2;
9180 GemmMicrokernelTester()
9181 .mr(6)
9182 .nr(8)
9183 .kr(1)
9184 .sr(1)
9185 .m(6)
9186 .n(8)
9187 .k(1)
9188 .cm_stride(11)
9189 .Test(xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9190 }
9191#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9192
9193
9194#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9195 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1) {
9196 TEST_REQUIRES_X86_AVX2;
9197 GemmMicrokernelTester()
9198 .mr(7)
9199 .nr(8)
9200 .kr(1)
9201 .sr(1)
9202 .m(7)
9203 .n(8)
9204 .k(1)
9205 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9206 }
9207
9208 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, strided_cn) {
9209 TEST_REQUIRES_X86_AVX2;
9210 GemmMicrokernelTester()
9211 .mr(7)
9212 .nr(8)
9213 .kr(1)
9214 .sr(1)
9215 .m(7)
9216 .n(8)
9217 .k(1)
9218 .cn_stride(11)
9219 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9220 }
9221
9222 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_strided_a) {
9223 TEST_REQUIRES_X86_AVX2;
9224 GemmMicrokernelTester()
9225 .mr(7)
9226 .nr(8)
9227 .kr(1)
9228 .sr(1)
9229 .m(7)
9230 .n(8)
9231 .k(1)
9232 .a_stride(3)
9233 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9234 }
9235
9236 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_subtile) {
9237 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009238 for (uint32_t n = 1; n <= 8; n++) {
9239 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009240 GemmMicrokernelTester()
9241 .mr(7)
9242 .nr(8)
9243 .kr(1)
9244 .sr(1)
9245 .m(m)
9246 .n(n)
9247 .k(1)
9248 .iterations(1)
9249 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9250 }
9251 }
9252 }
9253
9254 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
9255 TEST_REQUIRES_X86_AVX2;
9256 for (uint32_t m = 1; m <= 7; m++) {
9257 GemmMicrokernelTester()
9258 .mr(7)
9259 .nr(8)
9260 .kr(1)
9261 .sr(1)
9262 .m(m)
9263 .n(8)
9264 .k(1)
9265 .iterations(1)
9266 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9267 }
9268 }
9269
9270 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
9271 TEST_REQUIRES_X86_AVX2;
9272 for (uint32_t n = 1; n <= 8; n++) {
9273 GemmMicrokernelTester()
9274 .mr(7)
9275 .nr(8)
9276 .kr(1)
9277 .sr(1)
9278 .m(7)
9279 .n(n)
9280 .k(1)
9281 .iterations(1)
9282 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9283 }
9284 }
9285
9286 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_gt_1) {
9287 TEST_REQUIRES_X86_AVX2;
9288 for (size_t k = 2; k < 10; k++) {
9289 GemmMicrokernelTester()
9290 .mr(7)
9291 .nr(8)
9292 .kr(1)
9293 .sr(1)
9294 .m(7)
9295 .n(8)
9296 .k(k)
9297 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9298 }
9299 }
9300
9301 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_gt_1_strided_a) {
9302 TEST_REQUIRES_X86_AVX2;
9303 for (size_t k = 2; k < 10; k++) {
9304 GemmMicrokernelTester()
9305 .mr(7)
9306 .nr(8)
9307 .kr(1)
9308 .sr(1)
9309 .m(7)
9310 .n(8)
9311 .k(k)
9312 .a_stride(11)
9313 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9314 }
9315 }
9316
9317 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, k_gt_1_subtile) {
9318 TEST_REQUIRES_X86_AVX2;
9319 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009320 for (uint32_t n = 1; n <= 8; n++) {
9321 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009322 GemmMicrokernelTester()
9323 .mr(7)
9324 .nr(8)
9325 .kr(1)
9326 .sr(1)
9327 .m(m)
9328 .n(n)
9329 .k(k)
9330 .iterations(1)
9331 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9332 }
9333 }
9334 }
9335 }
9336
9337 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8) {
9338 TEST_REQUIRES_X86_AVX2;
9339 for (uint32_t n = 9; n < 16; n++) {
9340 for (size_t k = 1; k <= 5; k += 2) {
9341 GemmMicrokernelTester()
9342 .mr(7)
9343 .nr(8)
9344 .kr(1)
9345 .sr(1)
9346 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009347 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009348 .k(k)
9349 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9350 }
9351 }
9352 }
9353
9354 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
9355 TEST_REQUIRES_X86_AVX2;
9356 for (uint32_t n = 9; n < 16; n++) {
9357 for (size_t k = 1; k <= 5; k += 2) {
9358 GemmMicrokernelTester()
9359 .mr(7)
9360 .nr(8)
9361 .kr(1)
9362 .sr(1)
9363 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009364 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009365 .k(k)
9366 .cn_stride(11)
9367 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9368 }
9369 }
9370 }
9371
9372 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8_strided_a) {
9373 TEST_REQUIRES_X86_AVX2;
9374 for (uint32_t n = 9; n < 16; n++) {
9375 for (size_t k = 1; k <= 5; k += 2) {
9376 GemmMicrokernelTester()
9377 .mr(7)
9378 .nr(8)
9379 .kr(1)
9380 .sr(1)
9381 .m(7)
9382 .n(n)
9383 .k(k)
9384 .a_stride(7)
9385 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9386 }
9387 }
9388 }
9389
9390 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8_subtile) {
9391 TEST_REQUIRES_X86_AVX2;
9392 for (uint32_t n = 9; n < 16; n++) {
9393 for (size_t k = 1; k <= 5; k += 2) {
9394 for (uint32_t m = 1; m <= 7; m++) {
9395 GemmMicrokernelTester()
9396 .mr(7)
9397 .nr(8)
9398 .kr(1)
9399 .sr(1)
9400 .m(m)
9401 .n(n)
9402 .k(k)
9403 .iterations(1)
9404 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9405 }
9406 }
9407 }
9408 }
9409
9410 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8) {
9411 TEST_REQUIRES_X86_AVX2;
9412 for (uint32_t n = 16; n <= 24; n += 8) {
9413 for (size_t k = 1; k <= 5; k += 2) {
9414 GemmMicrokernelTester()
9415 .mr(7)
9416 .nr(8)
9417 .kr(1)
9418 .sr(1)
9419 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009420 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009421 .k(k)
9422 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9423 }
9424 }
9425 }
9426
9427 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8_strided_cn) {
9428 TEST_REQUIRES_X86_AVX2;
9429 for (uint32_t n = 16; n <= 24; n += 8) {
9430 for (size_t k = 1; k <= 5; k += 2) {
9431 GemmMicrokernelTester()
9432 .mr(7)
9433 .nr(8)
9434 .kr(1)
9435 .sr(1)
9436 .m(7)
9437 .n(n)
9438 .k(k)
9439 .cn_stride(11)
9440 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9441 }
9442 }
9443 }
9444
9445 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8_strided_a) {
9446 TEST_REQUIRES_X86_AVX2;
9447 for (uint32_t n = 16; n <= 24; n += 8) {
9448 for (size_t k = 1; k <= 5; k += 2) {
9449 GemmMicrokernelTester()
9450 .mr(7)
9451 .nr(8)
9452 .kr(1)
9453 .sr(1)
9454 .m(7)
9455 .n(n)
9456 .k(k)
9457 .a_stride(7)
9458 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9459 }
9460 }
9461 }
9462
9463 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8_subtile) {
9464 TEST_REQUIRES_X86_AVX2;
9465 for (uint32_t n = 16; n <= 24; n += 8) {
9466 for (size_t k = 1; k <= 5; k += 2) {
9467 for (uint32_t m = 1; m <= 7; m++) {
9468 GemmMicrokernelTester()
9469 .mr(7)
9470 .nr(8)
9471 .kr(1)
9472 .sr(1)
9473 .m(m)
9474 .n(n)
9475 .k(k)
9476 .iterations(1)
9477 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9478 }
9479 }
9480 }
9481 }
9482
9483 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, strided_cm_subtile) {
9484 TEST_REQUIRES_X86_AVX2;
9485 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009486 for (uint32_t n = 1; n <= 8; n++) {
9487 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009488 GemmMicrokernelTester()
9489 .mr(7)
9490 .nr(8)
9491 .kr(1)
9492 .sr(1)
9493 .m(m)
9494 .n(n)
9495 .k(k)
9496 .cm_stride(11)
9497 .iterations(1)
9498 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9499 }
9500 }
9501 }
9502 }
9503
9504 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, qmin) {
9505 TEST_REQUIRES_X86_AVX2;
9506 GemmMicrokernelTester()
9507 .mr(7)
9508 .nr(8)
9509 .kr(1)
9510 .sr(1)
9511 .m(7)
9512 .n(8)
9513 .k(1)
9514 .qmin(128)
9515 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9516 }
9517
9518 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, qmax) {
9519 TEST_REQUIRES_X86_AVX2;
9520 GemmMicrokernelTester()
9521 .mr(7)
9522 .nr(8)
9523 .kr(1)
9524 .sr(1)
9525 .m(7)
9526 .n(8)
9527 .k(1)
9528 .qmax(128)
9529 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9530 }
9531
9532 TEST(F16_GEMM_MINMAX_7X8__AVX2_BROADCAST, strided_cm) {
9533 TEST_REQUIRES_X86_AVX2;
9534 GemmMicrokernelTester()
9535 .mr(7)
9536 .nr(8)
9537 .kr(1)
9538 .sr(1)
9539 .m(7)
9540 .n(8)
9541 .k(1)
9542 .cm_stride(11)
9543 .Test(xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9544 }
9545#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9546
9547
9548#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9549 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1) {
9550 TEST_REQUIRES_X86_AVX2;
9551 GemmMicrokernelTester()
9552 .mr(1)
9553 .nr(16)
9554 .kr(1)
9555 .sr(1)
9556 .m(1)
9557 .n(16)
9558 .k(1)
9559 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9560 }
9561
9562 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, strided_cn) {
9563 TEST_REQUIRES_X86_AVX2;
9564 GemmMicrokernelTester()
9565 .mr(1)
9566 .nr(16)
9567 .kr(1)
9568 .sr(1)
9569 .m(1)
9570 .n(16)
9571 .k(1)
9572 .cn_stride(19)
9573 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9574 }
9575
9576 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_strided_a) {
9577 TEST_REQUIRES_X86_AVX2;
9578 GemmMicrokernelTester()
9579 .mr(1)
9580 .nr(16)
9581 .kr(1)
9582 .sr(1)
9583 .m(1)
9584 .n(16)
9585 .k(1)
9586 .a_stride(3)
9587 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9588 }
9589
9590 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_subtile) {
9591 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009592 for (uint32_t n = 1; n <= 16; n++) {
9593 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009594 GemmMicrokernelTester()
9595 .mr(1)
9596 .nr(16)
9597 .kr(1)
9598 .sr(1)
9599 .m(m)
9600 .n(n)
9601 .k(1)
9602 .iterations(1)
9603 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9604 }
9605 }
9606 }
9607
9608 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
9609 TEST_REQUIRES_X86_AVX2;
9610 for (uint32_t m = 1; m <= 1; m++) {
9611 GemmMicrokernelTester()
9612 .mr(1)
9613 .nr(16)
9614 .kr(1)
9615 .sr(1)
9616 .m(m)
9617 .n(16)
9618 .k(1)
9619 .iterations(1)
9620 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9621 }
9622 }
9623
9624 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
9625 TEST_REQUIRES_X86_AVX2;
9626 for (uint32_t n = 1; n <= 16; n++) {
9627 GemmMicrokernelTester()
9628 .mr(1)
9629 .nr(16)
9630 .kr(1)
9631 .sr(1)
9632 .m(1)
9633 .n(n)
9634 .k(1)
9635 .iterations(1)
9636 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9637 }
9638 }
9639
9640 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_gt_1) {
9641 TEST_REQUIRES_X86_AVX2;
9642 for (size_t k = 2; k < 10; k++) {
9643 GemmMicrokernelTester()
9644 .mr(1)
9645 .nr(16)
9646 .kr(1)
9647 .sr(1)
9648 .m(1)
9649 .n(16)
9650 .k(k)
9651 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9652 }
9653 }
9654
9655 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_gt_1_strided_a) {
9656 TEST_REQUIRES_X86_AVX2;
9657 for (size_t k = 2; k < 10; k++) {
9658 GemmMicrokernelTester()
9659 .mr(1)
9660 .nr(16)
9661 .kr(1)
9662 .sr(1)
9663 .m(1)
9664 .n(16)
9665 .k(k)
9666 .a_stride(11)
9667 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9668 }
9669 }
9670
9671 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, k_gt_1_subtile) {
9672 TEST_REQUIRES_X86_AVX2;
9673 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009674 for (uint32_t n = 1; n <= 16; n++) {
9675 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009676 GemmMicrokernelTester()
9677 .mr(1)
9678 .nr(16)
9679 .kr(1)
9680 .sr(1)
9681 .m(m)
9682 .n(n)
9683 .k(k)
9684 .iterations(1)
9685 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9686 }
9687 }
9688 }
9689 }
9690
9691 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16) {
9692 TEST_REQUIRES_X86_AVX2;
9693 for (uint32_t n = 17; n < 32; n++) {
9694 for (size_t k = 1; k <= 5; k += 2) {
9695 GemmMicrokernelTester()
9696 .mr(1)
9697 .nr(16)
9698 .kr(1)
9699 .sr(1)
9700 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009701 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009702 .k(k)
9703 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9704 }
9705 }
9706 }
9707
9708 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
9709 TEST_REQUIRES_X86_AVX2;
9710 for (uint32_t n = 17; n < 32; n++) {
9711 for (size_t k = 1; k <= 5; k += 2) {
9712 GemmMicrokernelTester()
9713 .mr(1)
9714 .nr(16)
9715 .kr(1)
9716 .sr(1)
9717 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009718 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009719 .k(k)
9720 .cn_stride(19)
9721 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9722 }
9723 }
9724 }
9725
9726 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16_strided_a) {
9727 TEST_REQUIRES_X86_AVX2;
9728 for (uint32_t n = 17; n < 32; n++) {
9729 for (size_t k = 1; k <= 5; k += 2) {
9730 GemmMicrokernelTester()
9731 .mr(1)
9732 .nr(16)
9733 .kr(1)
9734 .sr(1)
9735 .m(1)
9736 .n(n)
9737 .k(k)
9738 .a_stride(7)
9739 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9740 }
9741 }
9742 }
9743
9744 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16_subtile) {
9745 TEST_REQUIRES_X86_AVX2;
9746 for (uint32_t n = 17; n < 32; n++) {
9747 for (size_t k = 1; k <= 5; k += 2) {
9748 for (uint32_t m = 1; m <= 1; m++) {
9749 GemmMicrokernelTester()
9750 .mr(1)
9751 .nr(16)
9752 .kr(1)
9753 .sr(1)
9754 .m(m)
9755 .n(n)
9756 .k(k)
9757 .iterations(1)
9758 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9759 }
9760 }
9761 }
9762 }
9763
9764 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16) {
9765 TEST_REQUIRES_X86_AVX2;
9766 for (uint32_t n = 32; n <= 48; n += 16) {
9767 for (size_t k = 1; k <= 5; k += 2) {
9768 GemmMicrokernelTester()
9769 .mr(1)
9770 .nr(16)
9771 .kr(1)
9772 .sr(1)
9773 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009774 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08009775 .k(k)
9776 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9777 }
9778 }
9779 }
9780
9781 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16_strided_cn) {
9782 TEST_REQUIRES_X86_AVX2;
9783 for (uint32_t n = 32; n <= 48; n += 16) {
9784 for (size_t k = 1; k <= 5; k += 2) {
9785 GemmMicrokernelTester()
9786 .mr(1)
9787 .nr(16)
9788 .kr(1)
9789 .sr(1)
9790 .m(1)
9791 .n(n)
9792 .k(k)
9793 .cn_stride(19)
9794 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9795 }
9796 }
9797 }
9798
9799 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16_strided_a) {
9800 TEST_REQUIRES_X86_AVX2;
9801 for (uint32_t n = 32; n <= 48; n += 16) {
9802 for (size_t k = 1; k <= 5; k += 2) {
9803 GemmMicrokernelTester()
9804 .mr(1)
9805 .nr(16)
9806 .kr(1)
9807 .sr(1)
9808 .m(1)
9809 .n(n)
9810 .k(k)
9811 .a_stride(7)
9812 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9813 }
9814 }
9815 }
9816
9817 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16_subtile) {
9818 TEST_REQUIRES_X86_AVX2;
9819 for (uint32_t n = 32; n <= 48; n += 16) {
9820 for (size_t k = 1; k <= 5; k += 2) {
9821 for (uint32_t m = 1; m <= 1; m++) {
9822 GemmMicrokernelTester()
9823 .mr(1)
9824 .nr(16)
9825 .kr(1)
9826 .sr(1)
9827 .m(m)
9828 .n(n)
9829 .k(k)
9830 .iterations(1)
9831 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9832 }
9833 }
9834 }
9835 }
9836
9837 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, strided_cm_subtile) {
9838 TEST_REQUIRES_X86_AVX2;
9839 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009840 for (uint32_t n = 1; n <= 16; n++) {
9841 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009842 GemmMicrokernelTester()
9843 .mr(1)
9844 .nr(16)
9845 .kr(1)
9846 .sr(1)
9847 .m(m)
9848 .n(n)
9849 .k(k)
9850 .cm_stride(19)
9851 .iterations(1)
9852 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9853 }
9854 }
9855 }
9856 }
9857
9858 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, qmin) {
9859 TEST_REQUIRES_X86_AVX2;
9860 GemmMicrokernelTester()
9861 .mr(1)
9862 .nr(16)
9863 .kr(1)
9864 .sr(1)
9865 .m(1)
9866 .n(16)
9867 .k(1)
9868 .qmin(128)
9869 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9870 }
9871
9872 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, qmax) {
9873 TEST_REQUIRES_X86_AVX2;
9874 GemmMicrokernelTester()
9875 .mr(1)
9876 .nr(16)
9877 .kr(1)
9878 .sr(1)
9879 .m(1)
9880 .n(16)
9881 .k(1)
9882 .qmax(128)
9883 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9884 }
9885
9886 TEST(F16_GEMM_MINMAX_1X16__AVX2_BROADCAST, strided_cm) {
9887 TEST_REQUIRES_X86_AVX2;
9888 GemmMicrokernelTester()
9889 .mr(1)
9890 .nr(16)
9891 .kr(1)
9892 .sr(1)
9893 .m(1)
9894 .n(16)
9895 .k(1)
9896 .cm_stride(19)
9897 .Test(xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9898 }
9899#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9900
9901
9902#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9903 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1) {
9904 TEST_REQUIRES_X86_AVX2;
9905 GemmMicrokernelTester()
9906 .mr(3)
9907 .nr(16)
9908 .kr(1)
9909 .sr(1)
9910 .m(3)
9911 .n(16)
9912 .k(1)
9913 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9914 }
9915
9916 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, strided_cn) {
9917 TEST_REQUIRES_X86_AVX2;
9918 GemmMicrokernelTester()
9919 .mr(3)
9920 .nr(16)
9921 .kr(1)
9922 .sr(1)
9923 .m(3)
9924 .n(16)
9925 .k(1)
9926 .cn_stride(19)
9927 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9928 }
9929
9930 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_strided_a) {
9931 TEST_REQUIRES_X86_AVX2;
9932 GemmMicrokernelTester()
9933 .mr(3)
9934 .nr(16)
9935 .kr(1)
9936 .sr(1)
9937 .m(3)
9938 .n(16)
9939 .k(1)
9940 .a_stride(3)
9941 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9942 }
9943
9944 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_subtile) {
9945 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009946 for (uint32_t n = 1; n <= 16; n++) {
9947 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08009948 GemmMicrokernelTester()
9949 .mr(3)
9950 .nr(16)
9951 .kr(1)
9952 .sr(1)
9953 .m(m)
9954 .n(n)
9955 .k(1)
9956 .iterations(1)
9957 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9958 }
9959 }
9960 }
9961
9962 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
9963 TEST_REQUIRES_X86_AVX2;
9964 for (uint32_t m = 1; m <= 3; m++) {
9965 GemmMicrokernelTester()
9966 .mr(3)
9967 .nr(16)
9968 .kr(1)
9969 .sr(1)
9970 .m(m)
9971 .n(16)
9972 .k(1)
9973 .iterations(1)
9974 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9975 }
9976 }
9977
9978 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
9979 TEST_REQUIRES_X86_AVX2;
9980 for (uint32_t n = 1; n <= 16; n++) {
9981 GemmMicrokernelTester()
9982 .mr(3)
9983 .nr(16)
9984 .kr(1)
9985 .sr(1)
9986 .m(3)
9987 .n(n)
9988 .k(1)
9989 .iterations(1)
9990 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
9991 }
9992 }
9993
9994 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_gt_1) {
9995 TEST_REQUIRES_X86_AVX2;
9996 for (size_t k = 2; k < 10; k++) {
9997 GemmMicrokernelTester()
9998 .mr(3)
9999 .nr(16)
10000 .kr(1)
10001 .sr(1)
10002 .m(3)
10003 .n(16)
10004 .k(k)
10005 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10006 }
10007 }
10008
10009 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_gt_1_strided_a) {
10010 TEST_REQUIRES_X86_AVX2;
10011 for (size_t k = 2; k < 10; k++) {
10012 GemmMicrokernelTester()
10013 .mr(3)
10014 .nr(16)
10015 .kr(1)
10016 .sr(1)
10017 .m(3)
10018 .n(16)
10019 .k(k)
10020 .a_stride(11)
10021 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10022 }
10023 }
10024
10025 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, k_gt_1_subtile) {
10026 TEST_REQUIRES_X86_AVX2;
10027 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010028 for (uint32_t n = 1; n <= 16; n++) {
10029 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010030 GemmMicrokernelTester()
10031 .mr(3)
10032 .nr(16)
10033 .kr(1)
10034 .sr(1)
10035 .m(m)
10036 .n(n)
10037 .k(k)
10038 .iterations(1)
10039 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10040 }
10041 }
10042 }
10043 }
10044
10045 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16) {
10046 TEST_REQUIRES_X86_AVX2;
10047 for (uint32_t n = 17; n < 32; n++) {
10048 for (size_t k = 1; k <= 5; k += 2) {
10049 GemmMicrokernelTester()
10050 .mr(3)
10051 .nr(16)
10052 .kr(1)
10053 .sr(1)
10054 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010055 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010056 .k(k)
10057 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10058 }
10059 }
10060 }
10061
10062 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
10063 TEST_REQUIRES_X86_AVX2;
10064 for (uint32_t n = 17; n < 32; n++) {
10065 for (size_t k = 1; k <= 5; k += 2) {
10066 GemmMicrokernelTester()
10067 .mr(3)
10068 .nr(16)
10069 .kr(1)
10070 .sr(1)
10071 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010072 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010073 .k(k)
10074 .cn_stride(19)
10075 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10076 }
10077 }
10078 }
10079
10080 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16_strided_a) {
10081 TEST_REQUIRES_X86_AVX2;
10082 for (uint32_t n = 17; n < 32; n++) {
10083 for (size_t k = 1; k <= 5; k += 2) {
10084 GemmMicrokernelTester()
10085 .mr(3)
10086 .nr(16)
10087 .kr(1)
10088 .sr(1)
10089 .m(3)
10090 .n(n)
10091 .k(k)
10092 .a_stride(7)
10093 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10094 }
10095 }
10096 }
10097
10098 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16_subtile) {
10099 TEST_REQUIRES_X86_AVX2;
10100 for (uint32_t n = 17; n < 32; n++) {
10101 for (size_t k = 1; k <= 5; k += 2) {
10102 for (uint32_t m = 1; m <= 3; m++) {
10103 GemmMicrokernelTester()
10104 .mr(3)
10105 .nr(16)
10106 .kr(1)
10107 .sr(1)
10108 .m(m)
10109 .n(n)
10110 .k(k)
10111 .iterations(1)
10112 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10113 }
10114 }
10115 }
10116 }
10117
10118 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16) {
10119 TEST_REQUIRES_X86_AVX2;
10120 for (uint32_t n = 32; n <= 48; n += 16) {
10121 for (size_t k = 1; k <= 5; k += 2) {
10122 GemmMicrokernelTester()
10123 .mr(3)
10124 .nr(16)
10125 .kr(1)
10126 .sr(1)
10127 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010128 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010129 .k(k)
10130 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10131 }
10132 }
10133 }
10134
10135 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16_strided_cn) {
10136 TEST_REQUIRES_X86_AVX2;
10137 for (uint32_t n = 32; n <= 48; n += 16) {
10138 for (size_t k = 1; k <= 5; k += 2) {
10139 GemmMicrokernelTester()
10140 .mr(3)
10141 .nr(16)
10142 .kr(1)
10143 .sr(1)
10144 .m(3)
10145 .n(n)
10146 .k(k)
10147 .cn_stride(19)
10148 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10149 }
10150 }
10151 }
10152
10153 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16_strided_a) {
10154 TEST_REQUIRES_X86_AVX2;
10155 for (uint32_t n = 32; n <= 48; n += 16) {
10156 for (size_t k = 1; k <= 5; k += 2) {
10157 GemmMicrokernelTester()
10158 .mr(3)
10159 .nr(16)
10160 .kr(1)
10161 .sr(1)
10162 .m(3)
10163 .n(n)
10164 .k(k)
10165 .a_stride(7)
10166 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10167 }
10168 }
10169 }
10170
10171 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16_subtile) {
10172 TEST_REQUIRES_X86_AVX2;
10173 for (uint32_t n = 32; n <= 48; n += 16) {
10174 for (size_t k = 1; k <= 5; k += 2) {
10175 for (uint32_t m = 1; m <= 3; m++) {
10176 GemmMicrokernelTester()
10177 .mr(3)
10178 .nr(16)
10179 .kr(1)
10180 .sr(1)
10181 .m(m)
10182 .n(n)
10183 .k(k)
10184 .iterations(1)
10185 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10186 }
10187 }
10188 }
10189 }
10190
10191 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, strided_cm_subtile) {
10192 TEST_REQUIRES_X86_AVX2;
10193 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010194 for (uint32_t n = 1; n <= 16; n++) {
10195 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010196 GemmMicrokernelTester()
10197 .mr(3)
10198 .nr(16)
10199 .kr(1)
10200 .sr(1)
10201 .m(m)
10202 .n(n)
10203 .k(k)
10204 .cm_stride(19)
10205 .iterations(1)
10206 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10207 }
10208 }
10209 }
10210 }
10211
10212 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, qmin) {
10213 TEST_REQUIRES_X86_AVX2;
10214 GemmMicrokernelTester()
10215 .mr(3)
10216 .nr(16)
10217 .kr(1)
10218 .sr(1)
10219 .m(3)
10220 .n(16)
10221 .k(1)
10222 .qmin(128)
10223 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10224 }
10225
10226 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, qmax) {
10227 TEST_REQUIRES_X86_AVX2;
10228 GemmMicrokernelTester()
10229 .mr(3)
10230 .nr(16)
10231 .kr(1)
10232 .sr(1)
10233 .m(3)
10234 .n(16)
10235 .k(1)
10236 .qmax(128)
10237 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10238 }
10239
10240 TEST(F16_GEMM_MINMAX_3X16__AVX2_BROADCAST, strided_cm) {
10241 TEST_REQUIRES_X86_AVX2;
10242 GemmMicrokernelTester()
10243 .mr(3)
10244 .nr(16)
10245 .kr(1)
10246 .sr(1)
10247 .m(3)
10248 .n(16)
10249 .k(1)
10250 .cm_stride(19)
10251 .Test(xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10252 }
10253#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10254
10255
10256#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10257 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1) {
10258 TEST_REQUIRES_X86_AVX2;
10259 GemmMicrokernelTester()
10260 .mr(4)
10261 .nr(16)
10262 .kr(1)
10263 .sr(1)
10264 .m(4)
10265 .n(16)
10266 .k(1)
10267 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10268 }
10269
10270 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, strided_cn) {
10271 TEST_REQUIRES_X86_AVX2;
10272 GemmMicrokernelTester()
10273 .mr(4)
10274 .nr(16)
10275 .kr(1)
10276 .sr(1)
10277 .m(4)
10278 .n(16)
10279 .k(1)
10280 .cn_stride(19)
10281 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10282 }
10283
10284 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_strided_a) {
10285 TEST_REQUIRES_X86_AVX2;
10286 GemmMicrokernelTester()
10287 .mr(4)
10288 .nr(16)
10289 .kr(1)
10290 .sr(1)
10291 .m(4)
10292 .n(16)
10293 .k(1)
10294 .a_stride(3)
10295 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10296 }
10297
10298 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_subtile) {
10299 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010300 for (uint32_t n = 1; n <= 16; n++) {
10301 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010302 GemmMicrokernelTester()
10303 .mr(4)
10304 .nr(16)
10305 .kr(1)
10306 .sr(1)
10307 .m(m)
10308 .n(n)
10309 .k(1)
10310 .iterations(1)
10311 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10312 }
10313 }
10314 }
10315
10316 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
10317 TEST_REQUIRES_X86_AVX2;
10318 for (uint32_t m = 1; m <= 4; m++) {
10319 GemmMicrokernelTester()
10320 .mr(4)
10321 .nr(16)
10322 .kr(1)
10323 .sr(1)
10324 .m(m)
10325 .n(16)
10326 .k(1)
10327 .iterations(1)
10328 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10329 }
10330 }
10331
10332 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
10333 TEST_REQUIRES_X86_AVX2;
10334 for (uint32_t n = 1; n <= 16; n++) {
10335 GemmMicrokernelTester()
10336 .mr(4)
10337 .nr(16)
10338 .kr(1)
10339 .sr(1)
10340 .m(4)
10341 .n(n)
10342 .k(1)
10343 .iterations(1)
10344 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10345 }
10346 }
10347
10348 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_gt_1) {
10349 TEST_REQUIRES_X86_AVX2;
10350 for (size_t k = 2; k < 10; k++) {
10351 GemmMicrokernelTester()
10352 .mr(4)
10353 .nr(16)
10354 .kr(1)
10355 .sr(1)
10356 .m(4)
10357 .n(16)
10358 .k(k)
10359 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10360 }
10361 }
10362
10363 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_gt_1_strided_a) {
10364 TEST_REQUIRES_X86_AVX2;
10365 for (size_t k = 2; k < 10; k++) {
10366 GemmMicrokernelTester()
10367 .mr(4)
10368 .nr(16)
10369 .kr(1)
10370 .sr(1)
10371 .m(4)
10372 .n(16)
10373 .k(k)
10374 .a_stride(11)
10375 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10376 }
10377 }
10378
10379 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, k_gt_1_subtile) {
10380 TEST_REQUIRES_X86_AVX2;
10381 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010382 for (uint32_t n = 1; n <= 16; n++) {
10383 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010384 GemmMicrokernelTester()
10385 .mr(4)
10386 .nr(16)
10387 .kr(1)
10388 .sr(1)
10389 .m(m)
10390 .n(n)
10391 .k(k)
10392 .iterations(1)
10393 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10394 }
10395 }
10396 }
10397 }
10398
10399 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16) {
10400 TEST_REQUIRES_X86_AVX2;
10401 for (uint32_t n = 17; n < 32; n++) {
10402 for (size_t k = 1; k <= 5; k += 2) {
10403 GemmMicrokernelTester()
10404 .mr(4)
10405 .nr(16)
10406 .kr(1)
10407 .sr(1)
10408 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010409 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010410 .k(k)
10411 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10412 }
10413 }
10414 }
10415
10416 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
10417 TEST_REQUIRES_X86_AVX2;
10418 for (uint32_t n = 17; n < 32; n++) {
10419 for (size_t k = 1; k <= 5; k += 2) {
10420 GemmMicrokernelTester()
10421 .mr(4)
10422 .nr(16)
10423 .kr(1)
10424 .sr(1)
10425 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010426 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010427 .k(k)
10428 .cn_stride(19)
10429 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10430 }
10431 }
10432 }
10433
10434 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16_strided_a) {
10435 TEST_REQUIRES_X86_AVX2;
10436 for (uint32_t n = 17; n < 32; n++) {
10437 for (size_t k = 1; k <= 5; k += 2) {
10438 GemmMicrokernelTester()
10439 .mr(4)
10440 .nr(16)
10441 .kr(1)
10442 .sr(1)
10443 .m(4)
10444 .n(n)
10445 .k(k)
10446 .a_stride(7)
10447 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10448 }
10449 }
10450 }
10451
10452 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16_subtile) {
10453 TEST_REQUIRES_X86_AVX2;
10454 for (uint32_t n = 17; n < 32; n++) {
10455 for (size_t k = 1; k <= 5; k += 2) {
10456 for (uint32_t m = 1; m <= 4; m++) {
10457 GemmMicrokernelTester()
10458 .mr(4)
10459 .nr(16)
10460 .kr(1)
10461 .sr(1)
10462 .m(m)
10463 .n(n)
10464 .k(k)
10465 .iterations(1)
10466 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10467 }
10468 }
10469 }
10470 }
10471
10472 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16) {
10473 TEST_REQUIRES_X86_AVX2;
10474 for (uint32_t n = 32; n <= 48; n += 16) {
10475 for (size_t k = 1; k <= 5; k += 2) {
10476 GemmMicrokernelTester()
10477 .mr(4)
10478 .nr(16)
10479 .kr(1)
10480 .sr(1)
10481 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010482 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010483 .k(k)
10484 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10485 }
10486 }
10487 }
10488
10489 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16_strided_cn) {
10490 TEST_REQUIRES_X86_AVX2;
10491 for (uint32_t n = 32; n <= 48; n += 16) {
10492 for (size_t k = 1; k <= 5; k += 2) {
10493 GemmMicrokernelTester()
10494 .mr(4)
10495 .nr(16)
10496 .kr(1)
10497 .sr(1)
10498 .m(4)
10499 .n(n)
10500 .k(k)
10501 .cn_stride(19)
10502 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10503 }
10504 }
10505 }
10506
10507 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16_strided_a) {
10508 TEST_REQUIRES_X86_AVX2;
10509 for (uint32_t n = 32; n <= 48; n += 16) {
10510 for (size_t k = 1; k <= 5; k += 2) {
10511 GemmMicrokernelTester()
10512 .mr(4)
10513 .nr(16)
10514 .kr(1)
10515 .sr(1)
10516 .m(4)
10517 .n(n)
10518 .k(k)
10519 .a_stride(7)
10520 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10521 }
10522 }
10523 }
10524
10525 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16_subtile) {
10526 TEST_REQUIRES_X86_AVX2;
10527 for (uint32_t n = 32; n <= 48; n += 16) {
10528 for (size_t k = 1; k <= 5; k += 2) {
10529 for (uint32_t m = 1; m <= 4; m++) {
10530 GemmMicrokernelTester()
10531 .mr(4)
10532 .nr(16)
10533 .kr(1)
10534 .sr(1)
10535 .m(m)
10536 .n(n)
10537 .k(k)
10538 .iterations(1)
10539 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10540 }
10541 }
10542 }
10543 }
10544
10545 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, strided_cm_subtile) {
10546 TEST_REQUIRES_X86_AVX2;
10547 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010548 for (uint32_t n = 1; n <= 16; n++) {
10549 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010550 GemmMicrokernelTester()
10551 .mr(4)
10552 .nr(16)
10553 .kr(1)
10554 .sr(1)
10555 .m(m)
10556 .n(n)
10557 .k(k)
10558 .cm_stride(19)
10559 .iterations(1)
10560 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10561 }
10562 }
10563 }
10564 }
10565
10566 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, qmin) {
10567 TEST_REQUIRES_X86_AVX2;
10568 GemmMicrokernelTester()
10569 .mr(4)
10570 .nr(16)
10571 .kr(1)
10572 .sr(1)
10573 .m(4)
10574 .n(16)
10575 .k(1)
10576 .qmin(128)
10577 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10578 }
10579
10580 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, qmax) {
10581 TEST_REQUIRES_X86_AVX2;
10582 GemmMicrokernelTester()
10583 .mr(4)
10584 .nr(16)
10585 .kr(1)
10586 .sr(1)
10587 .m(4)
10588 .n(16)
10589 .k(1)
10590 .qmax(128)
10591 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10592 }
10593
10594 TEST(F16_GEMM_MINMAX_4X16__AVX2_BROADCAST, strided_cm) {
10595 TEST_REQUIRES_X86_AVX2;
10596 GemmMicrokernelTester()
10597 .mr(4)
10598 .nr(16)
10599 .kr(1)
10600 .sr(1)
10601 .m(4)
10602 .n(16)
10603 .k(1)
10604 .cm_stride(19)
10605 .Test(xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10606 }
10607#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10608
10609
10610#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10611 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1) {
10612 TEST_REQUIRES_X86_AVX2;
10613 GemmMicrokernelTester()
10614 .mr(5)
10615 .nr(16)
10616 .kr(1)
10617 .sr(1)
10618 .m(5)
10619 .n(16)
10620 .k(1)
10621 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10622 }
10623
10624 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, strided_cn) {
10625 TEST_REQUIRES_X86_AVX2;
10626 GemmMicrokernelTester()
10627 .mr(5)
10628 .nr(16)
10629 .kr(1)
10630 .sr(1)
10631 .m(5)
10632 .n(16)
10633 .k(1)
10634 .cn_stride(19)
10635 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10636 }
10637
10638 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_strided_a) {
10639 TEST_REQUIRES_X86_AVX2;
10640 GemmMicrokernelTester()
10641 .mr(5)
10642 .nr(16)
10643 .kr(1)
10644 .sr(1)
10645 .m(5)
10646 .n(16)
10647 .k(1)
10648 .a_stride(3)
10649 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10650 }
10651
10652 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_subtile) {
10653 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010654 for (uint32_t n = 1; n <= 16; n++) {
10655 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010656 GemmMicrokernelTester()
10657 .mr(5)
10658 .nr(16)
10659 .kr(1)
10660 .sr(1)
10661 .m(m)
10662 .n(n)
10663 .k(1)
10664 .iterations(1)
10665 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10666 }
10667 }
10668 }
10669
10670 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
10671 TEST_REQUIRES_X86_AVX2;
10672 for (uint32_t m = 1; m <= 5; m++) {
10673 GemmMicrokernelTester()
10674 .mr(5)
10675 .nr(16)
10676 .kr(1)
10677 .sr(1)
10678 .m(m)
10679 .n(16)
10680 .k(1)
10681 .iterations(1)
10682 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10683 }
10684 }
10685
10686 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
10687 TEST_REQUIRES_X86_AVX2;
10688 for (uint32_t n = 1; n <= 16; n++) {
10689 GemmMicrokernelTester()
10690 .mr(5)
10691 .nr(16)
10692 .kr(1)
10693 .sr(1)
10694 .m(5)
10695 .n(n)
10696 .k(1)
10697 .iterations(1)
10698 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10699 }
10700 }
10701
10702 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_gt_1) {
10703 TEST_REQUIRES_X86_AVX2;
10704 for (size_t k = 2; k < 10; k++) {
10705 GemmMicrokernelTester()
10706 .mr(5)
10707 .nr(16)
10708 .kr(1)
10709 .sr(1)
10710 .m(5)
10711 .n(16)
10712 .k(k)
10713 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10714 }
10715 }
10716
10717 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_gt_1_strided_a) {
10718 TEST_REQUIRES_X86_AVX2;
10719 for (size_t k = 2; k < 10; k++) {
10720 GemmMicrokernelTester()
10721 .mr(5)
10722 .nr(16)
10723 .kr(1)
10724 .sr(1)
10725 .m(5)
10726 .n(16)
10727 .k(k)
10728 .a_stride(11)
10729 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10730 }
10731 }
10732
10733 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, k_gt_1_subtile) {
10734 TEST_REQUIRES_X86_AVX2;
10735 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010736 for (uint32_t n = 1; n <= 16; n++) {
10737 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010738 GemmMicrokernelTester()
10739 .mr(5)
10740 .nr(16)
10741 .kr(1)
10742 .sr(1)
10743 .m(m)
10744 .n(n)
10745 .k(k)
10746 .iterations(1)
10747 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10748 }
10749 }
10750 }
10751 }
10752
10753 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16) {
10754 TEST_REQUIRES_X86_AVX2;
10755 for (uint32_t n = 17; n < 32; n++) {
10756 for (size_t k = 1; k <= 5; k += 2) {
10757 GemmMicrokernelTester()
10758 .mr(5)
10759 .nr(16)
10760 .kr(1)
10761 .sr(1)
10762 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010763 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010764 .k(k)
10765 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10766 }
10767 }
10768 }
10769
10770 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
10771 TEST_REQUIRES_X86_AVX2;
10772 for (uint32_t n = 17; n < 32; n++) {
10773 for (size_t k = 1; k <= 5; k += 2) {
10774 GemmMicrokernelTester()
10775 .mr(5)
10776 .nr(16)
10777 .kr(1)
10778 .sr(1)
10779 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010780 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010781 .k(k)
10782 .cn_stride(19)
10783 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10784 }
10785 }
10786 }
10787
10788 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16_strided_a) {
10789 TEST_REQUIRES_X86_AVX2;
10790 for (uint32_t n = 17; n < 32; n++) {
10791 for (size_t k = 1; k <= 5; k += 2) {
10792 GemmMicrokernelTester()
10793 .mr(5)
10794 .nr(16)
10795 .kr(1)
10796 .sr(1)
10797 .m(5)
10798 .n(n)
10799 .k(k)
10800 .a_stride(7)
10801 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10802 }
10803 }
10804 }
10805
10806 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16_subtile) {
10807 TEST_REQUIRES_X86_AVX2;
10808 for (uint32_t n = 17; n < 32; n++) {
10809 for (size_t k = 1; k <= 5; k += 2) {
10810 for (uint32_t m = 1; m <= 5; m++) {
10811 GemmMicrokernelTester()
10812 .mr(5)
10813 .nr(16)
10814 .kr(1)
10815 .sr(1)
10816 .m(m)
10817 .n(n)
10818 .k(k)
10819 .iterations(1)
10820 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10821 }
10822 }
10823 }
10824 }
10825
10826 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16) {
10827 TEST_REQUIRES_X86_AVX2;
10828 for (uint32_t n = 32; n <= 48; n += 16) {
10829 for (size_t k = 1; k <= 5; k += 2) {
10830 GemmMicrokernelTester()
10831 .mr(5)
10832 .nr(16)
10833 .kr(1)
10834 .sr(1)
10835 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010836 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -080010837 .k(k)
10838 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10839 }
10840 }
10841 }
10842
10843 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16_strided_cn) {
10844 TEST_REQUIRES_X86_AVX2;
10845 for (uint32_t n = 32; n <= 48; n += 16) {
10846 for (size_t k = 1; k <= 5; k += 2) {
10847 GemmMicrokernelTester()
10848 .mr(5)
10849 .nr(16)
10850 .kr(1)
10851 .sr(1)
10852 .m(5)
10853 .n(n)
10854 .k(k)
10855 .cn_stride(19)
10856 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10857 }
10858 }
10859 }
10860
10861 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16_strided_a) {
10862 TEST_REQUIRES_X86_AVX2;
10863 for (uint32_t n = 32; n <= 48; n += 16) {
10864 for (size_t k = 1; k <= 5; k += 2) {
10865 GemmMicrokernelTester()
10866 .mr(5)
10867 .nr(16)
10868 .kr(1)
10869 .sr(1)
10870 .m(5)
10871 .n(n)
10872 .k(k)
10873 .a_stride(7)
10874 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10875 }
10876 }
10877 }
10878
10879 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16_subtile) {
10880 TEST_REQUIRES_X86_AVX2;
10881 for (uint32_t n = 32; n <= 48; n += 16) {
10882 for (size_t k = 1; k <= 5; k += 2) {
10883 for (uint32_t m = 1; m <= 5; m++) {
10884 GemmMicrokernelTester()
10885 .mr(5)
10886 .nr(16)
10887 .kr(1)
10888 .sr(1)
10889 .m(m)
10890 .n(n)
10891 .k(k)
10892 .iterations(1)
10893 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10894 }
10895 }
10896 }
10897 }
10898
10899 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, strided_cm_subtile) {
10900 TEST_REQUIRES_X86_AVX2;
10901 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010902 for (uint32_t n = 1; n <= 16; n++) {
10903 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -080010904 GemmMicrokernelTester()
10905 .mr(5)
10906 .nr(16)
10907 .kr(1)
10908 .sr(1)
10909 .m(m)
10910 .n(n)
10911 .k(k)
10912 .cm_stride(19)
10913 .iterations(1)
10914 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10915 }
10916 }
10917 }
10918 }
10919
10920 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, qmin) {
10921 TEST_REQUIRES_X86_AVX2;
10922 GemmMicrokernelTester()
10923 .mr(5)
10924 .nr(16)
10925 .kr(1)
10926 .sr(1)
10927 .m(5)
10928 .n(16)
10929 .k(1)
10930 .qmin(128)
10931 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10932 }
10933
10934 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, qmax) {
10935 TEST_REQUIRES_X86_AVX2;
10936 GemmMicrokernelTester()
10937 .mr(5)
10938 .nr(16)
10939 .kr(1)
10940 .sr(1)
10941 .m(5)
10942 .n(16)
10943 .k(1)
10944 .qmax(128)
10945 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10946 }
10947
10948 TEST(F16_GEMM_MINMAX_5X16__AVX2_BROADCAST, strided_cm) {
10949 TEST_REQUIRES_X86_AVX2;
10950 GemmMicrokernelTester()
10951 .mr(5)
10952 .nr(16)
10953 .kr(1)
10954 .sr(1)
10955 .m(5)
10956 .n(16)
10957 .k(1)
10958 .cm_stride(19)
10959 .Test(xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
10960 }
10961#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64