blob: d6c6c66bc649202454b377fa7cf0ca0d3c67960d [file] [log] [blame]
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f16-igemm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Frank Barchardb0e4fae2020-05-04 15:27:51 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64
27 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4) {
28 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -080037 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -070038 }
39
40 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
42 GemmMicrokernelTester()
43 .mr(1)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(1)
48 .n(8)
49 .k(4)
50 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -080051 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -070052 }
53
54 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
55 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -080056 for (uint32_t n = 1; n <= 8; n++) {
57 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -070058 GemmMicrokernelTester()
59 .mr(1)
60 .nr(8)
61 .kr(1)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(4)
66 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -080067 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -070068 }
69 }
70 }
71
72 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
73 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
74 for (uint32_t m = 1; m <= 1; m++) {
75 GemmMicrokernelTester()
76 .mr(1)
77 .nr(8)
78 .kr(1)
79 .sr(1)
80 .m(m)
81 .n(8)
82 .k(4)
83 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -080084 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -070085 }
86 }
87
88 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
89 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
90 for (uint32_t n = 1; n <= 8; n++) {
91 GemmMicrokernelTester()
92 .mr(1)
93 .nr(8)
94 .kr(1)
95 .sr(1)
96 .m(1)
97 .n(n)
98 .k(4)
99 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800100 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700101 }
102 }
103
104 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_lt_4) {
105 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
106 for (size_t k = 1; k < 4; k++) {
107 GemmMicrokernelTester()
108 .mr(1)
109 .nr(8)
110 .kr(1)
111 .sr(1)
112 .m(1)
113 .n(8)
114 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800115 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700116 }
117 }
118
119 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
120 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
121 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800122 for (uint32_t n = 1; n <= 8; n++) {
123 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700124 GemmMicrokernelTester()
125 .mr(1)
126 .nr(8)
127 .kr(1)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800133 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700134 }
135 }
136 }
137 }
138
139 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_gt_4) {
140 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
141 for (size_t k = 5; k < 8; k++) {
142 GemmMicrokernelTester()
143 .mr(1)
144 .nr(8)
145 .kr(1)
146 .sr(1)
147 .m(1)
148 .n(8)
149 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800150 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700151 }
152 }
153
154 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
155 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
156 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800157 for (uint32_t n = 1; n <= 8; n++) {
158 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700159 GemmMicrokernelTester()
160 .mr(1)
161 .nr(8)
162 .kr(1)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800168 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700169 }
170 }
171 }
172 }
173
174 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_div_4) {
175 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
176 for (size_t k = 8; k <= 40; k += 4) {
177 GemmMicrokernelTester()
178 .mr(1)
179 .nr(8)
180 .kr(1)
181 .sr(1)
182 .m(1)
183 .n(8)
184 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800185 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700186 }
187 }
188
189 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
190 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
191 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800192 for (uint32_t n = 1; n <= 8; n++) {
193 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700194 GemmMicrokernelTester()
195 .mr(1)
196 .nr(8)
197 .kr(1)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800203 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700204 }
205 }
206 }
207 }
208
209 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8) {
210 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
211 for (uint32_t n = 9; n < 16; n++) {
212 for (size_t k = 1; k <= 20; k += 5) {
213 GemmMicrokernelTester()
214 .mr(1)
215 .nr(8)
216 .kr(1)
217 .sr(1)
218 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800219 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700220 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800221 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700222 }
223 }
224 }
225
226 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
227 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
228 for (uint32_t n = 9; n < 16; n++) {
229 for (size_t k = 1; k <= 20; k += 5) {
230 GemmMicrokernelTester()
231 .mr(1)
232 .nr(8)
233 .kr(1)
234 .sr(1)
235 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800236 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700237 .k(k)
238 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800239 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700240 }
241 }
242 }
243
244 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
245 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
246 for (uint32_t n = 9; n < 16; n++) {
247 for (size_t k = 1; k <= 20; k += 5) {
248 for (uint32_t m = 1; m <= 1; m++) {
249 GemmMicrokernelTester()
250 .mr(1)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800258 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700259 }
260 }
261 }
262 }
263
264 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8) {
265 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 20; k += 5) {
268 GemmMicrokernelTester()
269 .mr(1)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800274 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700275 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800276 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700277 }
278 }
279 }
280
281 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
282 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 20; k += 5) {
285 GemmMicrokernelTester()
286 .mr(1)
287 .nr(8)
288 .kr(1)
289 .sr(1)
290 .m(1)
291 .n(n)
292 .k(k)
293 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800294 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700295 }
296 }
297 }
298
299 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
300 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
301 for (uint32_t n = 16; n <= 24; n += 8) {
302 for (size_t k = 1; k <= 20; k += 5) {
303 for (uint32_t m = 1; m <= 1; m++) {
304 GemmMicrokernelTester()
305 .mr(1)
306 .nr(8)
307 .kr(1)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800313 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700314 }
315 }
316 }
317 }
318
319 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, small_kernel) {
320 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
321 for (size_t k = 1; k <= 20; k += 5) {
322 GemmMicrokernelTester()
323 .mr(1)
324 .nr(8)
325 .kr(1)
326 .sr(1)
327 .m(1)
328 .n(8)
329 .k(k)
330 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800331 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700332 }
333 }
334
335 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
337 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800338 for (uint32_t n = 1; n <= 8; n++) {
339 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700340 GemmMicrokernelTester()
341 .mr(1)
342 .nr(8)
343 .kr(1)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800350 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700351 }
352 }
353 }
354 }
355
356 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_gt_8_small_kernel) {
357 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
358 for (uint32_t n = 9; n < 16; n++) {
359 for (size_t k = 1; k <= 20; k += 5) {
360 GemmMicrokernelTester()
361 .mr(1)
362 .nr(8)
363 .kr(1)
364 .sr(1)
365 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800366 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700367 .k(k)
368 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800369 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700370 }
371 }
372 }
373
374 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, n_div_8_small_kernel) {
375 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 20; k += 5) {
378 GemmMicrokernelTester()
379 .mr(1)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800384 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700385 .k(k)
386 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800387 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700388 }
389 }
390 }
391
392 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
394 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800395 for (uint32_t n = 1; n <= 8; n++) {
396 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700397 GemmMicrokernelTester()
398 .mr(1)
399 .nr(8)
400 .kr(1)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800407 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700408 }
409 }
410 }
411 }
412
413 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, a_offset) {
414 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
415 for (size_t k = 1; k <= 20; k += 5) {
416 GemmMicrokernelTester()
417 .mr(1)
418 .nr(8)
419 .kr(1)
420 .sr(1)
421 .m(1)
422 .n(8)
423 .k(k)
424 .ks(3)
425 .a_offset(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800426 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700427 }
428 }
429
430 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, zero) {
431 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800432 for (size_t k = 1; k <= 20; k += 5) {
433 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700434 GemmMicrokernelTester()
435 .mr(1)
436 .nr(8)
437 .kr(1)
438 .sr(1)
439 .m(1)
440 .n(8)
441 .k(k)
442 .ks(3)
443 .a_offset(23)
444 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800445 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700446 }
447 }
448 }
449
450 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, qmin) {
451 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
452 GemmMicrokernelTester()
453 .mr(1)
454 .nr(8)
455 .kr(1)
456 .sr(1)
457 .m(1)
458 .n(8)
459 .k(4)
460 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800461 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700462 }
463
464 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, qmax) {
465 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
466 GemmMicrokernelTester()
467 .mr(1)
468 .nr(8)
469 .kr(1)
470 .sr(1)
471 .m(1)
472 .n(8)
473 .k(4)
474 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800475 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700476 }
477
478 TEST(F16_IGEMM_MINMAX_1X8__NEONFP16ARITH_LD64, strided_cm) {
479 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
480 GemmMicrokernelTester()
481 .mr(1)
482 .nr(8)
483 .kr(1)
484 .sr(1)
485 .m(1)
486 .n(8)
487 .k(4)
488 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800489 .Test(xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700490 }
491#endif // XNN_ARCH_ARM64
492
493
494#if XNN_ARCH_ARM64
495 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4) {
496 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
497 GemmMicrokernelTester()
498 .mr(4)
499 .nr(8)
500 .kr(1)
501 .sr(1)
502 .m(4)
503 .n(8)
504 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800505 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700506 }
507
508 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cn) {
509 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
510 GemmMicrokernelTester()
511 .mr(4)
512 .nr(8)
513 .kr(1)
514 .sr(1)
515 .m(4)
516 .n(8)
517 .k(4)
518 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800519 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700520 }
521
522 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
523 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800524 for (uint32_t n = 1; n <= 8; n++) {
525 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700526 GemmMicrokernelTester()
527 .mr(4)
528 .nr(8)
529 .kr(1)
530 .sr(1)
531 .m(m)
532 .n(n)
533 .k(4)
534 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800535 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700536 }
537 }
538 }
539
540 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
541 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
542 for (uint32_t m = 1; m <= 4; m++) {
543 GemmMicrokernelTester()
544 .mr(4)
545 .nr(8)
546 .kr(1)
547 .sr(1)
548 .m(m)
549 .n(8)
550 .k(4)
551 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800552 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700553 }
554 }
555
556 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
557 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
558 for (uint32_t n = 1; n <= 8; n++) {
559 GemmMicrokernelTester()
560 .mr(4)
561 .nr(8)
562 .kr(1)
563 .sr(1)
564 .m(4)
565 .n(n)
566 .k(4)
567 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800568 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700569 }
570 }
571
572 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4) {
573 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
574 for (size_t k = 1; k < 4; k++) {
575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(4)
581 .n(8)
582 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800583 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700584 }
585 }
586
587 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
588 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
589 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800590 for (uint32_t n = 1; n <= 8; n++) {
591 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(m)
598 .n(n)
599 .k(k)
600 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800601 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700602 }
603 }
604 }
605 }
606
607 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4) {
608 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
609 for (size_t k = 5; k < 8; k++) {
610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(4)
616 .n(8)
617 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800618 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700619 }
620 }
621
622 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
623 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
624 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800625 for (uint32_t n = 1; n <= 8; n++) {
626 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700627 GemmMicrokernelTester()
628 .mr(4)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(m)
633 .n(n)
634 .k(k)
635 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800636 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700637 }
638 }
639 }
640 }
641
642 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4) {
643 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
644 for (size_t k = 8; k <= 40; k += 4) {
645 GemmMicrokernelTester()
646 .mr(4)
647 .nr(8)
648 .kr(1)
649 .sr(1)
650 .m(4)
651 .n(8)
652 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800653 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700654 }
655 }
656
657 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
658 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
659 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700662 GemmMicrokernelTester()
663 .mr(4)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800671 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700672 }
673 }
674 }
675 }
676
677 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8) {
678 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
679 for (uint32_t n = 9; n < 16; n++) {
680 for (size_t k = 1; k <= 20; k += 5) {
681 GemmMicrokernelTester()
682 .mr(4)
683 .nr(8)
684 .kr(1)
685 .sr(1)
686 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800687 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700688 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800689 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700690 }
691 }
692 }
693
694 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
695 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
696 for (uint32_t n = 9; n < 16; n++) {
697 for (size_t k = 1; k <= 20; k += 5) {
698 GemmMicrokernelTester()
699 .mr(4)
700 .nr(8)
701 .kr(1)
702 .sr(1)
703 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800704 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700705 .k(k)
706 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800707 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700708 }
709 }
710 }
711
712 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
713 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
714 for (uint32_t n = 9; n < 16; n++) {
715 for (size_t k = 1; k <= 20; k += 5) {
716 for (uint32_t m = 1; m <= 4; m++) {
717 GemmMicrokernelTester()
718 .mr(4)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(m)
723 .n(n)
724 .k(k)
725 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800726 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700727 }
728 }
729 }
730 }
731
732 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8) {
733 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
734 for (uint32_t n = 16; n <= 24; n += 8) {
735 for (size_t k = 1; k <= 20; k += 5) {
736 GemmMicrokernelTester()
737 .mr(4)
738 .nr(8)
739 .kr(1)
740 .sr(1)
741 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800742 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700743 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800744 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700745 }
746 }
747 }
748
749 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
750 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
751 for (uint32_t n = 16; n <= 24; n += 8) {
752 for (size_t k = 1; k <= 20; k += 5) {
753 GemmMicrokernelTester()
754 .mr(4)
755 .nr(8)
756 .kr(1)
757 .sr(1)
758 .m(4)
759 .n(n)
760 .k(k)
761 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800762 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700763 }
764 }
765 }
766
767 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
768 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
769 for (uint32_t n = 16; n <= 24; n += 8) {
770 for (size_t k = 1; k <= 20; k += 5) {
771 for (uint32_t m = 1; m <= 4; m++) {
772 GemmMicrokernelTester()
773 .mr(4)
774 .nr(8)
775 .kr(1)
776 .sr(1)
777 .m(m)
778 .n(n)
779 .k(k)
780 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800781 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700782 }
783 }
784 }
785 }
786
787 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, small_kernel) {
788 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
789 for (size_t k = 1; k <= 20; k += 5) {
790 GemmMicrokernelTester()
791 .mr(4)
792 .nr(8)
793 .kr(1)
794 .sr(1)
795 .m(4)
796 .n(8)
797 .k(k)
798 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800799 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700800 }
801 }
802
803 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, small_kernel_subtile) {
804 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
805 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800806 for (uint32_t n = 1; n <= 8; n++) {
807 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700808 GemmMicrokernelTester()
809 .mr(4)
810 .nr(8)
811 .kr(1)
812 .sr(1)
813 .m(m)
814 .n(n)
815 .k(k)
816 .ks(3)
817 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800818 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700819 }
820 }
821 }
822 }
823
824 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_gt_8_small_kernel) {
825 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
826 for (uint32_t n = 9; n < 16; n++) {
827 for (size_t k = 1; k <= 20; k += 5) {
828 GemmMicrokernelTester()
829 .mr(4)
830 .nr(8)
831 .kr(1)
832 .sr(1)
833 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800834 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700835 .k(k)
836 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800837 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700838 }
839 }
840 }
841
842 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, n_div_8_small_kernel) {
843 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
844 for (uint32_t n = 16; n <= 24; n += 8) {
845 for (size_t k = 1; k <= 20; k += 5) {
846 GemmMicrokernelTester()
847 .mr(4)
848 .nr(8)
849 .kr(1)
850 .sr(1)
851 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800852 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700853 .k(k)
854 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800855 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700856 }
857 }
858 }
859
860 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
861 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
862 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800863 for (uint32_t n = 1; n <= 8; n++) {
864 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700865 GemmMicrokernelTester()
866 .mr(4)
867 .nr(8)
868 .kr(1)
869 .sr(1)
870 .m(m)
871 .n(n)
872 .k(k)
873 .cm_stride(11)
874 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800875 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700876 }
877 }
878 }
879 }
880
881 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, a_offset) {
882 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
883 for (size_t k = 1; k <= 20; k += 5) {
884 GemmMicrokernelTester()
885 .mr(4)
886 .nr(8)
887 .kr(1)
888 .sr(1)
889 .m(4)
890 .n(8)
891 .k(k)
892 .ks(3)
893 .a_offset(83)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800894 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700895 }
896 }
897
898 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, zero) {
899 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800900 for (size_t k = 1; k <= 20; k += 5) {
901 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700902 GemmMicrokernelTester()
903 .mr(4)
904 .nr(8)
905 .kr(1)
906 .sr(1)
907 .m(4)
908 .n(8)
909 .k(k)
910 .ks(3)
911 .a_offset(83)
912 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800913 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700914 }
915 }
916 }
917
918 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, qmin) {
919 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
920 GemmMicrokernelTester()
921 .mr(4)
922 .nr(8)
923 .kr(1)
924 .sr(1)
925 .m(4)
926 .n(8)
927 .k(4)
928 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800929 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700930 }
931
932 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, qmax) {
933 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
934 GemmMicrokernelTester()
935 .mr(4)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(4)
940 .n(8)
941 .k(4)
942 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800943 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700944 }
945
946 TEST(F16_IGEMM_MINMAX_4X8__NEONFP16ARITH_LD64, strided_cm) {
947 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
948 GemmMicrokernelTester()
949 .mr(4)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(4)
954 .n(8)
955 .k(4)
956 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800957 .Test(xnn_f16_igemm_minmax_ukernel_4x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700958 }
959#endif // XNN_ARCH_ARM64
960
961
962#if XNN_ARCH_ARM64
963 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4) {
964 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
965 GemmMicrokernelTester()
966 .mr(6)
967 .nr(8)
968 .kr(1)
969 .sr(1)
970 .m(6)
971 .n(8)
972 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800973 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700974 }
975
976 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cn) {
977 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
978 GemmMicrokernelTester()
979 .mr(6)
980 .nr(8)
981 .kr(1)
982 .sr(1)
983 .m(6)
984 .n(8)
985 .k(4)
986 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -0800987 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700988 }
989
990 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
991 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800992 for (uint32_t n = 1; n <= 8; n++) {
993 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -0700994 GemmMicrokernelTester()
995 .mr(6)
996 .nr(8)
997 .kr(1)
998 .sr(1)
999 .m(m)
1000 .n(n)
1001 .k(4)
1002 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001003 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001004 }
1005 }
1006 }
1007
1008 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
1009 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1010 for (uint32_t m = 1; m <= 6; m++) {
1011 GemmMicrokernelTester()
1012 .mr(6)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(m)
1017 .n(8)
1018 .k(4)
1019 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001020 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001021 }
1022 }
1023
1024 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
1025 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1026 for (uint32_t n = 1; n <= 8; n++) {
1027 GemmMicrokernelTester()
1028 .mr(6)
1029 .nr(8)
1030 .kr(1)
1031 .sr(1)
1032 .m(6)
1033 .n(n)
1034 .k(4)
1035 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001036 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001037 }
1038 }
1039
1040 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4) {
1041 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1042 for (size_t k = 1; k < 4; k++) {
1043 GemmMicrokernelTester()
1044 .mr(6)
1045 .nr(8)
1046 .kr(1)
1047 .sr(1)
1048 .m(6)
1049 .n(8)
1050 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001051 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001052 }
1053 }
1054
1055 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
1056 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1057 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001058 for (uint32_t n = 1; n <= 8; n++) {
1059 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001060 GemmMicrokernelTester()
1061 .mr(6)
1062 .nr(8)
1063 .kr(1)
1064 .sr(1)
1065 .m(m)
1066 .n(n)
1067 .k(k)
1068 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001069 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001070 }
1071 }
1072 }
1073 }
1074
1075 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4) {
1076 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1077 for (size_t k = 5; k < 8; k++) {
1078 GemmMicrokernelTester()
1079 .mr(6)
1080 .nr(8)
1081 .kr(1)
1082 .sr(1)
1083 .m(6)
1084 .n(8)
1085 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001086 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001087 }
1088 }
1089
1090 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
1091 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1092 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001093 for (uint32_t n = 1; n <= 8; n++) {
1094 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001095 GemmMicrokernelTester()
1096 .mr(6)
1097 .nr(8)
1098 .kr(1)
1099 .sr(1)
1100 .m(m)
1101 .n(n)
1102 .k(k)
1103 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001104 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001105 }
1106 }
1107 }
1108 }
1109
1110 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4) {
1111 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1112 for (size_t k = 8; k <= 40; k += 4) {
1113 GemmMicrokernelTester()
1114 .mr(6)
1115 .nr(8)
1116 .kr(1)
1117 .sr(1)
1118 .m(6)
1119 .n(8)
1120 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001121 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001122 }
1123 }
1124
1125 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
1126 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1127 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001128 for (uint32_t n = 1; n <= 8; n++) {
1129 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001130 GemmMicrokernelTester()
1131 .mr(6)
1132 .nr(8)
1133 .kr(1)
1134 .sr(1)
1135 .m(m)
1136 .n(n)
1137 .k(k)
1138 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001139 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001140 }
1141 }
1142 }
1143 }
1144
1145 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8) {
1146 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1147 for (uint32_t n = 9; n < 16; n++) {
1148 for (size_t k = 1; k <= 20; k += 5) {
1149 GemmMicrokernelTester()
1150 .mr(6)
1151 .nr(8)
1152 .kr(1)
1153 .sr(1)
1154 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001155 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001156 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001157 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001158 }
1159 }
1160 }
1161
1162 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
1163 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1164 for (uint32_t n = 9; n < 16; n++) {
1165 for (size_t k = 1; k <= 20; k += 5) {
1166 GemmMicrokernelTester()
1167 .mr(6)
1168 .nr(8)
1169 .kr(1)
1170 .sr(1)
1171 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001172 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001173 .k(k)
1174 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001175 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001176 }
1177 }
1178 }
1179
1180 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
1181 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1182 for (uint32_t n = 9; n < 16; n++) {
1183 for (size_t k = 1; k <= 20; k += 5) {
1184 for (uint32_t m = 1; m <= 6; m++) {
1185 GemmMicrokernelTester()
1186 .mr(6)
1187 .nr(8)
1188 .kr(1)
1189 .sr(1)
1190 .m(m)
1191 .n(n)
1192 .k(k)
1193 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001194 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001195 }
1196 }
1197 }
1198 }
1199
1200 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8) {
1201 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1202 for (uint32_t n = 16; n <= 24; n += 8) {
1203 for (size_t k = 1; k <= 20; k += 5) {
1204 GemmMicrokernelTester()
1205 .mr(6)
1206 .nr(8)
1207 .kr(1)
1208 .sr(1)
1209 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001211 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001212 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001213 }
1214 }
1215 }
1216
1217 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
1218 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1219 for (uint32_t n = 16; n <= 24; n += 8) {
1220 for (size_t k = 1; k <= 20; k += 5) {
1221 GemmMicrokernelTester()
1222 .mr(6)
1223 .nr(8)
1224 .kr(1)
1225 .sr(1)
1226 .m(6)
1227 .n(n)
1228 .k(k)
1229 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001230 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001231 }
1232 }
1233 }
1234
1235 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
1236 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1237 for (uint32_t n = 16; n <= 24; n += 8) {
1238 for (size_t k = 1; k <= 20; k += 5) {
1239 for (uint32_t m = 1; m <= 6; m++) {
1240 GemmMicrokernelTester()
1241 .mr(6)
1242 .nr(8)
1243 .kr(1)
1244 .sr(1)
1245 .m(m)
1246 .n(n)
1247 .k(k)
1248 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001249 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001250 }
1251 }
1252 }
1253 }
1254
1255 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, small_kernel) {
1256 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1257 for (size_t k = 1; k <= 20; k += 5) {
1258 GemmMicrokernelTester()
1259 .mr(6)
1260 .nr(8)
1261 .kr(1)
1262 .sr(1)
1263 .m(6)
1264 .n(8)
1265 .k(k)
1266 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001267 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001268 }
1269 }
1270
1271 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, small_kernel_subtile) {
1272 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1273 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001274 for (uint32_t n = 1; n <= 8; n++) {
1275 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001276 GemmMicrokernelTester()
1277 .mr(6)
1278 .nr(8)
1279 .kr(1)
1280 .sr(1)
1281 .m(m)
1282 .n(n)
1283 .k(k)
1284 .ks(3)
1285 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001286 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001287 }
1288 }
1289 }
1290 }
1291
1292 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_gt_8_small_kernel) {
1293 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1294 for (uint32_t n = 9; n < 16; n++) {
1295 for (size_t k = 1; k <= 20; k += 5) {
1296 GemmMicrokernelTester()
1297 .mr(6)
1298 .nr(8)
1299 .kr(1)
1300 .sr(1)
1301 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001302 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001303 .k(k)
1304 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001305 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001306 }
1307 }
1308 }
1309
1310 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, n_div_8_small_kernel) {
1311 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1312 for (uint32_t n = 16; n <= 24; n += 8) {
1313 for (size_t k = 1; k <= 20; k += 5) {
1314 GemmMicrokernelTester()
1315 .mr(6)
1316 .nr(8)
1317 .kr(1)
1318 .sr(1)
1319 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001320 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001321 .k(k)
1322 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001323 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001324 }
1325 }
1326 }
1327
1328 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
1329 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1330 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001331 for (uint32_t n = 1; n <= 8; n++) {
1332 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001333 GemmMicrokernelTester()
1334 .mr(6)
1335 .nr(8)
1336 .kr(1)
1337 .sr(1)
1338 .m(m)
1339 .n(n)
1340 .k(k)
1341 .cm_stride(11)
1342 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001343 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001344 }
1345 }
1346 }
1347 }
1348
1349 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, a_offset) {
1350 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1351 for (size_t k = 1; k <= 20; k += 5) {
1352 GemmMicrokernelTester()
1353 .mr(6)
1354 .nr(8)
1355 .kr(1)
1356 .sr(1)
1357 .m(6)
1358 .n(8)
1359 .k(k)
1360 .ks(3)
1361 .a_offset(127)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001362 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001363 }
1364 }
1365
1366 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, zero) {
1367 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001368 for (size_t k = 1; k <= 20; k += 5) {
1369 for (uint32_t mz = 0; mz < 6; mz++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001370 GemmMicrokernelTester()
1371 .mr(6)
1372 .nr(8)
1373 .kr(1)
1374 .sr(1)
1375 .m(6)
1376 .n(8)
1377 .k(k)
1378 .ks(3)
1379 .a_offset(127)
1380 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001381 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001382 }
1383 }
1384 }
1385
1386 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, qmin) {
1387 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1388 GemmMicrokernelTester()
1389 .mr(6)
1390 .nr(8)
1391 .kr(1)
1392 .sr(1)
1393 .m(6)
1394 .n(8)
1395 .k(4)
1396 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001397 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001398 }
1399
1400 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, qmax) {
1401 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1402 GemmMicrokernelTester()
1403 .mr(6)
1404 .nr(8)
1405 .kr(1)
1406 .sr(1)
1407 .m(6)
1408 .n(8)
1409 .k(4)
1410 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001411 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001412 }
1413
1414 TEST(F16_IGEMM_MINMAX_6X8__NEONFP16ARITH_LD64, strided_cm) {
1415 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1416 GemmMicrokernelTester()
1417 .mr(6)
1418 .nr(8)
1419 .kr(1)
1420 .sr(1)
1421 .m(6)
1422 .n(8)
1423 .k(4)
1424 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001425 .Test(xnn_f16_igemm_minmax_ukernel_6x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001426 }
1427#endif // XNN_ARCH_ARM64
1428
1429
1430#if XNN_ARCH_ARM64
1431 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4) {
1432 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1433 GemmMicrokernelTester()
1434 .mr(8)
1435 .nr(8)
1436 .kr(1)
1437 .sr(1)
1438 .m(8)
1439 .n(8)
1440 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001441 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001442 }
1443
1444 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cn) {
1445 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1446 GemmMicrokernelTester()
1447 .mr(8)
1448 .nr(8)
1449 .kr(1)
1450 .sr(1)
1451 .m(8)
1452 .n(8)
1453 .k(4)
1454 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001455 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001456 }
1457
1458 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile) {
1459 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001460 for (uint32_t n = 1; n <= 8; n++) {
1461 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001462 GemmMicrokernelTester()
1463 .mr(8)
1464 .nr(8)
1465 .kr(1)
1466 .sr(1)
1467 .m(m)
1468 .n(n)
1469 .k(4)
1470 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001471 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001472 }
1473 }
1474 }
1475
1476 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
1477 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1478 for (uint32_t m = 1; m <= 8; m++) {
1479 GemmMicrokernelTester()
1480 .mr(8)
1481 .nr(8)
1482 .kr(1)
1483 .sr(1)
1484 .m(m)
1485 .n(8)
1486 .k(4)
1487 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001488 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001489 }
1490 }
1491
1492 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
1493 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1494 for (uint32_t n = 1; n <= 8; n++) {
1495 GemmMicrokernelTester()
1496 .mr(8)
1497 .nr(8)
1498 .kr(1)
1499 .sr(1)
1500 .m(8)
1501 .n(n)
1502 .k(4)
1503 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001504 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001505 }
1506 }
1507
1508 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4) {
1509 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1510 for (size_t k = 1; k < 4; k++) {
1511 GemmMicrokernelTester()
1512 .mr(8)
1513 .nr(8)
1514 .kr(1)
1515 .sr(1)
1516 .m(8)
1517 .n(8)
1518 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001519 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001520 }
1521 }
1522
1523 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_lt_4_subtile) {
1524 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1525 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001526 for (uint32_t n = 1; n <= 8; n++) {
1527 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001528 GemmMicrokernelTester()
1529 .mr(8)
1530 .nr(8)
1531 .kr(1)
1532 .sr(1)
1533 .m(m)
1534 .n(n)
1535 .k(k)
1536 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001537 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001538 }
1539 }
1540 }
1541 }
1542
1543 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4) {
1544 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1545 for (size_t k = 5; k < 8; k++) {
1546 GemmMicrokernelTester()
1547 .mr(8)
1548 .nr(8)
1549 .kr(1)
1550 .sr(1)
1551 .m(8)
1552 .n(8)
1553 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001554 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001555 }
1556 }
1557
1558 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_gt_4_subtile) {
1559 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1560 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001561 for (uint32_t n = 1; n <= 8; n++) {
1562 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001563 GemmMicrokernelTester()
1564 .mr(8)
1565 .nr(8)
1566 .kr(1)
1567 .sr(1)
1568 .m(m)
1569 .n(n)
1570 .k(k)
1571 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001572 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001573 }
1574 }
1575 }
1576 }
1577
1578 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4) {
1579 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1580 for (size_t k = 8; k <= 40; k += 4) {
1581 GemmMicrokernelTester()
1582 .mr(8)
1583 .nr(8)
1584 .kr(1)
1585 .sr(1)
1586 .m(8)
1587 .n(8)
1588 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001589 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001590 }
1591 }
1592
1593 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, k_div_4_subtile) {
1594 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1595 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001596 for (uint32_t n = 1; n <= 8; n++) {
1597 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001598 GemmMicrokernelTester()
1599 .mr(8)
1600 .nr(8)
1601 .kr(1)
1602 .sr(1)
1603 .m(m)
1604 .n(n)
1605 .k(k)
1606 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001607 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001608 }
1609 }
1610 }
1611 }
1612
1613 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8) {
1614 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1615 for (uint32_t n = 9; n < 16; n++) {
1616 for (size_t k = 1; k <= 20; k += 5) {
1617 GemmMicrokernelTester()
1618 .mr(8)
1619 .nr(8)
1620 .kr(1)
1621 .sr(1)
1622 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001623 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001624 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001625 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001626 }
1627 }
1628 }
1629
1630 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) {
1631 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1632 for (uint32_t n = 9; n < 16; n++) {
1633 for (size_t k = 1; k <= 20; k += 5) {
1634 GemmMicrokernelTester()
1635 .mr(8)
1636 .nr(8)
1637 .kr(1)
1638 .sr(1)
1639 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001640 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001641 .k(k)
1642 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001643 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001644 }
1645 }
1646 }
1647
1648 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_subtile) {
1649 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1650 for (uint32_t n = 9; n < 16; n++) {
1651 for (size_t k = 1; k <= 20; k += 5) {
1652 for (uint32_t m = 1; m <= 8; m++) {
1653 GemmMicrokernelTester()
1654 .mr(8)
1655 .nr(8)
1656 .kr(1)
1657 .sr(1)
1658 .m(m)
1659 .n(n)
1660 .k(k)
1661 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001662 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001663 }
1664 }
1665 }
1666 }
1667
1668 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8) {
1669 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1670 for (uint32_t n = 16; n <= 24; n += 8) {
1671 for (size_t k = 1; k <= 20; k += 5) {
1672 GemmMicrokernelTester()
1673 .mr(8)
1674 .nr(8)
1675 .kr(1)
1676 .sr(1)
1677 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001678 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001679 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001680 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001681 }
1682 }
1683 }
1684
1685 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) {
1686 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1687 for (uint32_t n = 16; n <= 24; n += 8) {
1688 for (size_t k = 1; k <= 20; k += 5) {
1689 GemmMicrokernelTester()
1690 .mr(8)
1691 .nr(8)
1692 .kr(1)
1693 .sr(1)
1694 .m(8)
1695 .n(n)
1696 .k(k)
1697 .cn_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001698 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001699 }
1700 }
1701 }
1702
1703 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_subtile) {
1704 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1705 for (uint32_t n = 16; n <= 24; n += 8) {
1706 for (size_t k = 1; k <= 20; k += 5) {
1707 for (uint32_t m = 1; m <= 8; m++) {
1708 GemmMicrokernelTester()
1709 .mr(8)
1710 .nr(8)
1711 .kr(1)
1712 .sr(1)
1713 .m(m)
1714 .n(n)
1715 .k(k)
1716 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001717 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001718 }
1719 }
1720 }
1721 }
1722
1723 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, small_kernel) {
1724 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1725 for (size_t k = 1; k <= 20; k += 5) {
1726 GemmMicrokernelTester()
1727 .mr(8)
1728 .nr(8)
1729 .kr(1)
1730 .sr(1)
1731 .m(8)
1732 .n(8)
1733 .k(k)
1734 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001735 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001736 }
1737 }
1738
1739 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, small_kernel_subtile) {
1740 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1741 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001742 for (uint32_t n = 1; n <= 8; n++) {
1743 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001744 GemmMicrokernelTester()
1745 .mr(8)
1746 .nr(8)
1747 .kr(1)
1748 .sr(1)
1749 .m(m)
1750 .n(n)
1751 .k(k)
1752 .ks(3)
1753 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001754 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001755 }
1756 }
1757 }
1758 }
1759
1760 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_gt_8_small_kernel) {
1761 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1762 for (uint32_t n = 9; n < 16; n++) {
1763 for (size_t k = 1; k <= 20; k += 5) {
1764 GemmMicrokernelTester()
1765 .mr(8)
1766 .nr(8)
1767 .kr(1)
1768 .sr(1)
1769 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001770 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001771 .k(k)
1772 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001773 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001774 }
1775 }
1776 }
1777
1778 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, n_div_8_small_kernel) {
1779 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1780 for (uint32_t n = 16; n <= 24; n += 8) {
1781 for (size_t k = 1; k <= 20; k += 5) {
1782 GemmMicrokernelTester()
1783 .mr(8)
1784 .nr(8)
1785 .kr(1)
1786 .sr(1)
1787 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001788 .n(n)
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001789 .k(k)
1790 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001791 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001792 }
1793 }
1794 }
1795
1796 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cm_subtile) {
1797 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1798 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001799 for (uint32_t n = 1; n <= 8; n++) {
1800 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001801 GemmMicrokernelTester()
1802 .mr(8)
1803 .nr(8)
1804 .kr(1)
1805 .sr(1)
1806 .m(m)
1807 .n(n)
1808 .k(k)
1809 .cm_stride(11)
1810 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001811 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001812 }
1813 }
1814 }
1815 }
1816
1817 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, a_offset) {
1818 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1819 for (size_t k = 1; k <= 20; k += 5) {
1820 GemmMicrokernelTester()
1821 .mr(8)
1822 .nr(8)
1823 .kr(1)
1824 .sr(1)
1825 .m(8)
1826 .n(8)
1827 .k(k)
1828 .ks(3)
1829 .a_offset(163)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001830 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001831 }
1832 }
1833
1834 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, zero) {
1835 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001836 for (size_t k = 1; k <= 20; k += 5) {
1837 for (uint32_t mz = 0; mz < 8; mz++) {
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001838 GemmMicrokernelTester()
1839 .mr(8)
1840 .nr(8)
1841 .kr(1)
1842 .sr(1)
1843 .m(8)
1844 .n(8)
1845 .k(k)
1846 .ks(3)
1847 .a_offset(163)
1848 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001849 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001850 }
1851 }
1852 }
1853
1854 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, qmin) {
1855 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1856 GemmMicrokernelTester()
1857 .mr(8)
1858 .nr(8)
1859 .kr(1)
1860 .sr(1)
1861 .m(8)
1862 .n(8)
1863 .k(4)
1864 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001865 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001866 }
1867
1868 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, qmax) {
1869 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1870 GemmMicrokernelTester()
1871 .mr(8)
1872 .nr(8)
1873 .kr(1)
1874 .sr(1)
1875 .m(8)
1876 .n(8)
1877 .k(4)
1878 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001879 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001880 }
1881
1882 TEST(F16_IGEMM_MINMAX_8X8__NEONFP16ARITH_LD64, strided_cm) {
1883 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1884 GemmMicrokernelTester()
1885 .mr(8)
1886 .nr(8)
1887 .kr(1)
1888 .sr(1)
1889 .m(8)
1890 .n(8)
1891 .k(4)
1892 .cm_stride(11)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001893 .Test(xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchardb0e4fae2020-05-04 15:27:51 -07001894 }
1895#endif // XNN_ARCH_ARM64
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001896
1897
1898#if XNN_ARCH_ARM64
1899 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4) {
1900 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1901 GemmMicrokernelTester()
1902 .mr(1)
1903 .nr(16)
1904 .kr(1)
1905 .sr(1)
1906 .m(1)
1907 .n(16)
1908 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001909 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001910 }
1911
1912 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, strided_cn) {
1913 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1914 GemmMicrokernelTester()
1915 .mr(1)
1916 .nr(16)
1917 .kr(1)
1918 .sr(1)
1919 .m(1)
1920 .n(16)
1921 .k(4)
1922 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001923 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001924 }
1925
1926 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
1927 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001928 for (uint32_t n = 1; n <= 16; n++) {
1929 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001930 GemmMicrokernelTester()
1931 .mr(1)
1932 .nr(16)
1933 .kr(1)
1934 .sr(1)
1935 .m(m)
1936 .n(n)
1937 .k(4)
1938 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001939 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001940 }
1941 }
1942 }
1943
1944 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
1945 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1946 for (uint32_t m = 1; m <= 1; m++) {
1947 GemmMicrokernelTester()
1948 .mr(1)
1949 .nr(16)
1950 .kr(1)
1951 .sr(1)
1952 .m(m)
1953 .n(16)
1954 .k(4)
1955 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001956 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001957 }
1958 }
1959
1960 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
1961 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1962 for (uint32_t n = 1; n <= 16; n++) {
1963 GemmMicrokernelTester()
1964 .mr(1)
1965 .nr(16)
1966 .kr(1)
1967 .sr(1)
1968 .m(1)
1969 .n(n)
1970 .k(4)
1971 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001972 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001973 }
1974 }
1975
1976 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_lt_4) {
1977 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1978 for (size_t k = 1; k < 4; k++) {
1979 GemmMicrokernelTester()
1980 .mr(1)
1981 .nr(16)
1982 .kr(1)
1983 .sr(1)
1984 .m(1)
1985 .n(16)
1986 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08001987 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001988 }
1989 }
1990
1991 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
1992 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1993 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001994 for (uint32_t n = 1; n <= 16; n++) {
1995 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07001996 GemmMicrokernelTester()
1997 .mr(1)
1998 .nr(16)
1999 .kr(1)
2000 .sr(1)
2001 .m(m)
2002 .n(n)
2003 .k(k)
2004 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002005 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002006 }
2007 }
2008 }
2009 }
2010
2011 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_gt_4) {
2012 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2013 for (size_t k = 5; k < 8; k++) {
2014 GemmMicrokernelTester()
2015 .mr(1)
2016 .nr(16)
2017 .kr(1)
2018 .sr(1)
2019 .m(1)
2020 .n(16)
2021 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002022 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002023 }
2024 }
2025
2026 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
2027 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2028 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002029 for (uint32_t n = 1; n <= 16; n++) {
2030 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002031 GemmMicrokernelTester()
2032 .mr(1)
2033 .nr(16)
2034 .kr(1)
2035 .sr(1)
2036 .m(m)
2037 .n(n)
2038 .k(k)
2039 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002040 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002041 }
2042 }
2043 }
2044 }
2045
2046 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_div_4) {
2047 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2048 for (size_t k = 8; k <= 40; k += 4) {
2049 GemmMicrokernelTester()
2050 .mr(1)
2051 .nr(16)
2052 .kr(1)
2053 .sr(1)
2054 .m(1)
2055 .n(16)
2056 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002057 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002058 }
2059 }
2060
2061 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
2062 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2063 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002064 for (uint32_t n = 1; n <= 16; n++) {
2065 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002066 GemmMicrokernelTester()
2067 .mr(1)
2068 .nr(16)
2069 .kr(1)
2070 .sr(1)
2071 .m(m)
2072 .n(n)
2073 .k(k)
2074 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002075 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002076 }
2077 }
2078 }
2079 }
2080
2081 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16) {
2082 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2083 for (uint32_t n = 17; n < 32; n++) {
2084 for (size_t k = 1; k <= 20; k += 5) {
2085 GemmMicrokernelTester()
2086 .mr(1)
2087 .nr(16)
2088 .kr(1)
2089 .sr(1)
2090 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002091 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002092 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002093 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002094 }
2095 }
2096 }
2097
2098 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
2099 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2100 for (uint32_t n = 17; n < 32; n++) {
2101 for (size_t k = 1; k <= 20; k += 5) {
2102 GemmMicrokernelTester()
2103 .mr(1)
2104 .nr(16)
2105 .kr(1)
2106 .sr(1)
2107 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002108 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002109 .k(k)
2110 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002111 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002112 }
2113 }
2114 }
2115
2116 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
2117 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2118 for (uint32_t n = 17; n < 32; n++) {
2119 for (size_t k = 1; k <= 20; k += 5) {
2120 for (uint32_t m = 1; m <= 1; m++) {
2121 GemmMicrokernelTester()
2122 .mr(1)
2123 .nr(16)
2124 .kr(1)
2125 .sr(1)
2126 .m(m)
2127 .n(n)
2128 .k(k)
2129 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002130 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002131 }
2132 }
2133 }
2134 }
2135
2136 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16) {
2137 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2138 for (uint32_t n = 32; n <= 48; n += 16) {
2139 for (size_t k = 1; k <= 20; k += 5) {
2140 GemmMicrokernelTester()
2141 .mr(1)
2142 .nr(16)
2143 .kr(1)
2144 .sr(1)
2145 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002146 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002147 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002148 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002149 }
2150 }
2151 }
2152
2153 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
2154 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2155 for (uint32_t n = 32; n <= 48; n += 16) {
2156 for (size_t k = 1; k <= 20; k += 5) {
2157 GemmMicrokernelTester()
2158 .mr(1)
2159 .nr(16)
2160 .kr(1)
2161 .sr(1)
2162 .m(1)
2163 .n(n)
2164 .k(k)
2165 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002166 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002167 }
2168 }
2169 }
2170
2171 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
2172 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2173 for (uint32_t n = 32; n <= 48; n += 16) {
2174 for (size_t k = 1; k <= 20; k += 5) {
2175 for (uint32_t m = 1; m <= 1; m++) {
2176 GemmMicrokernelTester()
2177 .mr(1)
2178 .nr(16)
2179 .kr(1)
2180 .sr(1)
2181 .m(m)
2182 .n(n)
2183 .k(k)
2184 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002185 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002186 }
2187 }
2188 }
2189 }
2190
2191 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, small_kernel) {
2192 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2193 for (size_t k = 1; k <= 20; k += 5) {
2194 GemmMicrokernelTester()
2195 .mr(1)
2196 .nr(16)
2197 .kr(1)
2198 .sr(1)
2199 .m(1)
2200 .n(16)
2201 .k(k)
2202 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002203 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002204 }
2205 }
2206
2207 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, small_kernel_subtile) {
2208 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2209 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002210 for (uint32_t n = 1; n <= 16; n++) {
2211 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002212 GemmMicrokernelTester()
2213 .mr(1)
2214 .nr(16)
2215 .kr(1)
2216 .sr(1)
2217 .m(m)
2218 .n(n)
2219 .k(k)
2220 .ks(3)
2221 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002222 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002223 }
2224 }
2225 }
2226 }
2227
2228 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_gt_16_small_kernel) {
2229 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2230 for (uint32_t n = 17; n < 32; n++) {
2231 for (size_t k = 1; k <= 20; k += 5) {
2232 GemmMicrokernelTester()
2233 .mr(1)
2234 .nr(16)
2235 .kr(1)
2236 .sr(1)
2237 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002238 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002239 .k(k)
2240 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002241 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002242 }
2243 }
2244 }
2245
2246 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, n_div_16_small_kernel) {
2247 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2248 for (uint32_t n = 32; n <= 48; n += 16) {
2249 for (size_t k = 1; k <= 20; k += 5) {
2250 GemmMicrokernelTester()
2251 .mr(1)
2252 .nr(16)
2253 .kr(1)
2254 .sr(1)
2255 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002256 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002257 .k(k)
2258 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002259 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002260 }
2261 }
2262 }
2263
2264 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
2265 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2266 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002267 for (uint32_t n = 1; n <= 16; n++) {
2268 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002269 GemmMicrokernelTester()
2270 .mr(1)
2271 .nr(16)
2272 .kr(1)
2273 .sr(1)
2274 .m(m)
2275 .n(n)
2276 .k(k)
2277 .cm_stride(19)
2278 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002279 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002280 }
2281 }
2282 }
2283 }
2284
2285 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, a_offset) {
2286 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2287 for (size_t k = 1; k <= 20; k += 5) {
2288 GemmMicrokernelTester()
2289 .mr(1)
2290 .nr(16)
2291 .kr(1)
2292 .sr(1)
2293 .m(1)
2294 .n(16)
2295 .k(k)
2296 .ks(3)
2297 .a_offset(23)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002298 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002299 }
2300 }
2301
2302 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, zero) {
2303 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002304 for (size_t k = 1; k <= 20; k += 5) {
2305 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002306 GemmMicrokernelTester()
2307 .mr(1)
2308 .nr(16)
2309 .kr(1)
2310 .sr(1)
2311 .m(1)
2312 .n(16)
2313 .k(k)
2314 .ks(3)
2315 .a_offset(23)
2316 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002317 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002318 }
2319 }
2320 }
2321
2322 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, qmin) {
2323 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2324 GemmMicrokernelTester()
2325 .mr(1)
2326 .nr(16)
2327 .kr(1)
2328 .sr(1)
2329 .m(1)
2330 .n(16)
2331 .k(4)
2332 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002333 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002334 }
2335
2336 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, qmax) {
2337 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2338 GemmMicrokernelTester()
2339 .mr(1)
2340 .nr(16)
2341 .kr(1)
2342 .sr(1)
2343 .m(1)
2344 .n(16)
2345 .k(4)
2346 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002347 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002348 }
2349
2350 TEST(F16_IGEMM_MINMAX_1X16__NEONFP16ARITH_LD64, strided_cm) {
2351 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2352 GemmMicrokernelTester()
2353 .mr(1)
2354 .nr(16)
2355 .kr(1)
2356 .sr(1)
2357 .m(1)
2358 .n(16)
2359 .k(4)
2360 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002361 .Test(xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002362 }
2363#endif // XNN_ARCH_ARM64
2364
2365
2366#if XNN_ARCH_ARM64
2367 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4) {
2368 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2369 GemmMicrokernelTester()
2370 .mr(4)
2371 .nr(16)
2372 .kr(1)
2373 .sr(1)
2374 .m(4)
2375 .n(16)
2376 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002377 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002378 }
2379
2380 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, strided_cn) {
2381 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2382 GemmMicrokernelTester()
2383 .mr(4)
2384 .nr(16)
2385 .kr(1)
2386 .sr(1)
2387 .m(4)
2388 .n(16)
2389 .k(4)
2390 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002391 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002392 }
2393
2394 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
2395 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002396 for (uint32_t n = 1; n <= 16; n++) {
2397 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002398 GemmMicrokernelTester()
2399 .mr(4)
2400 .nr(16)
2401 .kr(1)
2402 .sr(1)
2403 .m(m)
2404 .n(n)
2405 .k(4)
2406 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002407 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002408 }
2409 }
2410 }
2411
2412 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
2413 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2414 for (uint32_t m = 1; m <= 4; m++) {
2415 GemmMicrokernelTester()
2416 .mr(4)
2417 .nr(16)
2418 .kr(1)
2419 .sr(1)
2420 .m(m)
2421 .n(16)
2422 .k(4)
2423 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002424 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002425 }
2426 }
2427
2428 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
2429 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2430 for (uint32_t n = 1; n <= 16; n++) {
2431 GemmMicrokernelTester()
2432 .mr(4)
2433 .nr(16)
2434 .kr(1)
2435 .sr(1)
2436 .m(4)
2437 .n(n)
2438 .k(4)
2439 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002440 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002441 }
2442 }
2443
2444 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_lt_4) {
2445 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2446 for (size_t k = 1; k < 4; k++) {
2447 GemmMicrokernelTester()
2448 .mr(4)
2449 .nr(16)
2450 .kr(1)
2451 .sr(1)
2452 .m(4)
2453 .n(16)
2454 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002455 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002456 }
2457 }
2458
2459 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
2460 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2461 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002462 for (uint32_t n = 1; n <= 16; n++) {
2463 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002464 GemmMicrokernelTester()
2465 .mr(4)
2466 .nr(16)
2467 .kr(1)
2468 .sr(1)
2469 .m(m)
2470 .n(n)
2471 .k(k)
2472 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002473 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002474 }
2475 }
2476 }
2477 }
2478
2479 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_gt_4) {
2480 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2481 for (size_t k = 5; k < 8; k++) {
2482 GemmMicrokernelTester()
2483 .mr(4)
2484 .nr(16)
2485 .kr(1)
2486 .sr(1)
2487 .m(4)
2488 .n(16)
2489 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002490 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002491 }
2492 }
2493
2494 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
2495 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2496 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002497 for (uint32_t n = 1; n <= 16; n++) {
2498 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002499 GemmMicrokernelTester()
2500 .mr(4)
2501 .nr(16)
2502 .kr(1)
2503 .sr(1)
2504 .m(m)
2505 .n(n)
2506 .k(k)
2507 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002508 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002509 }
2510 }
2511 }
2512 }
2513
2514 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_div_4) {
2515 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2516 for (size_t k = 8; k <= 40; k += 4) {
2517 GemmMicrokernelTester()
2518 .mr(4)
2519 .nr(16)
2520 .kr(1)
2521 .sr(1)
2522 .m(4)
2523 .n(16)
2524 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002525 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002526 }
2527 }
2528
2529 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
2530 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2531 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002532 for (uint32_t n = 1; n <= 16; n++) {
2533 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002534 GemmMicrokernelTester()
2535 .mr(4)
2536 .nr(16)
2537 .kr(1)
2538 .sr(1)
2539 .m(m)
2540 .n(n)
2541 .k(k)
2542 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002543 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002544 }
2545 }
2546 }
2547 }
2548
2549 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16) {
2550 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2551 for (uint32_t n = 17; n < 32; n++) {
2552 for (size_t k = 1; k <= 20; k += 5) {
2553 GemmMicrokernelTester()
2554 .mr(4)
2555 .nr(16)
2556 .kr(1)
2557 .sr(1)
2558 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002559 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002560 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002561 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002562 }
2563 }
2564 }
2565
2566 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
2567 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2568 for (uint32_t n = 17; n < 32; n++) {
2569 for (size_t k = 1; k <= 20; k += 5) {
2570 GemmMicrokernelTester()
2571 .mr(4)
2572 .nr(16)
2573 .kr(1)
2574 .sr(1)
2575 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002576 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002577 .k(k)
2578 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002579 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002580 }
2581 }
2582 }
2583
2584 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
2585 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2586 for (uint32_t n = 17; n < 32; n++) {
2587 for (size_t k = 1; k <= 20; k += 5) {
2588 for (uint32_t m = 1; m <= 4; m++) {
2589 GemmMicrokernelTester()
2590 .mr(4)
2591 .nr(16)
2592 .kr(1)
2593 .sr(1)
2594 .m(m)
2595 .n(n)
2596 .k(k)
2597 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002598 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002599 }
2600 }
2601 }
2602 }
2603
2604 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16) {
2605 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2606 for (uint32_t n = 32; n <= 48; n += 16) {
2607 for (size_t k = 1; k <= 20; k += 5) {
2608 GemmMicrokernelTester()
2609 .mr(4)
2610 .nr(16)
2611 .kr(1)
2612 .sr(1)
2613 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002614 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002615 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002616 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002617 }
2618 }
2619 }
2620
2621 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
2622 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2623 for (uint32_t n = 32; n <= 48; n += 16) {
2624 for (size_t k = 1; k <= 20; k += 5) {
2625 GemmMicrokernelTester()
2626 .mr(4)
2627 .nr(16)
2628 .kr(1)
2629 .sr(1)
2630 .m(4)
2631 .n(n)
2632 .k(k)
2633 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002634 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002635 }
2636 }
2637 }
2638
2639 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
2640 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2641 for (uint32_t n = 32; n <= 48; n += 16) {
2642 for (size_t k = 1; k <= 20; k += 5) {
2643 for (uint32_t m = 1; m <= 4; m++) {
2644 GemmMicrokernelTester()
2645 .mr(4)
2646 .nr(16)
2647 .kr(1)
2648 .sr(1)
2649 .m(m)
2650 .n(n)
2651 .k(k)
2652 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002653 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002654 }
2655 }
2656 }
2657 }
2658
2659 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, small_kernel) {
2660 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2661 for (size_t k = 1; k <= 20; k += 5) {
2662 GemmMicrokernelTester()
2663 .mr(4)
2664 .nr(16)
2665 .kr(1)
2666 .sr(1)
2667 .m(4)
2668 .n(16)
2669 .k(k)
2670 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002671 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002672 }
2673 }
2674
2675 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, small_kernel_subtile) {
2676 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2677 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002678 for (uint32_t n = 1; n <= 16; n++) {
2679 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002680 GemmMicrokernelTester()
2681 .mr(4)
2682 .nr(16)
2683 .kr(1)
2684 .sr(1)
2685 .m(m)
2686 .n(n)
2687 .k(k)
2688 .ks(3)
2689 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002690 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002691 }
2692 }
2693 }
2694 }
2695
2696 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_gt_16_small_kernel) {
2697 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2698 for (uint32_t n = 17; n < 32; n++) {
2699 for (size_t k = 1; k <= 20; k += 5) {
2700 GemmMicrokernelTester()
2701 .mr(4)
2702 .nr(16)
2703 .kr(1)
2704 .sr(1)
2705 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002706 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002707 .k(k)
2708 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002709 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002710 }
2711 }
2712 }
2713
2714 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, n_div_16_small_kernel) {
2715 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2716 for (uint32_t n = 32; n <= 48; n += 16) {
2717 for (size_t k = 1; k <= 20; k += 5) {
2718 GemmMicrokernelTester()
2719 .mr(4)
2720 .nr(16)
2721 .kr(1)
2722 .sr(1)
2723 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002724 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002725 .k(k)
2726 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002727 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002728 }
2729 }
2730 }
2731
2732 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
2733 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2734 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002735 for (uint32_t n = 1; n <= 16; n++) {
2736 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002737 GemmMicrokernelTester()
2738 .mr(4)
2739 .nr(16)
2740 .kr(1)
2741 .sr(1)
2742 .m(m)
2743 .n(n)
2744 .k(k)
2745 .cm_stride(19)
2746 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002747 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002748 }
2749 }
2750 }
2751 }
2752
2753 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, a_offset) {
2754 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2755 for (size_t k = 1; k <= 20; k += 5) {
2756 GemmMicrokernelTester()
2757 .mr(4)
2758 .nr(16)
2759 .kr(1)
2760 .sr(1)
2761 .m(4)
2762 .n(16)
2763 .k(k)
2764 .ks(3)
2765 .a_offset(83)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002766 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002767 }
2768 }
2769
2770 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, zero) {
2771 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002772 for (size_t k = 1; k <= 20; k += 5) {
2773 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002774 GemmMicrokernelTester()
2775 .mr(4)
2776 .nr(16)
2777 .kr(1)
2778 .sr(1)
2779 .m(4)
2780 .n(16)
2781 .k(k)
2782 .ks(3)
2783 .a_offset(83)
2784 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002785 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002786 }
2787 }
2788 }
2789
2790 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, qmin) {
2791 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2792 GemmMicrokernelTester()
2793 .mr(4)
2794 .nr(16)
2795 .kr(1)
2796 .sr(1)
2797 .m(4)
2798 .n(16)
2799 .k(4)
2800 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002801 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002802 }
2803
2804 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, qmax) {
2805 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2806 GemmMicrokernelTester()
2807 .mr(4)
2808 .nr(16)
2809 .kr(1)
2810 .sr(1)
2811 .m(4)
2812 .n(16)
2813 .k(4)
2814 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002815 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002816 }
2817
2818 TEST(F16_IGEMM_MINMAX_4X16__NEONFP16ARITH_LD64, strided_cm) {
2819 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2820 GemmMicrokernelTester()
2821 .mr(4)
2822 .nr(16)
2823 .kr(1)
2824 .sr(1)
2825 .m(4)
2826 .n(16)
2827 .k(4)
2828 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002829 .Test(xnn_f16_igemm_minmax_ukernel_4x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002830 }
2831#endif // XNN_ARCH_ARM64
2832
2833
2834#if XNN_ARCH_ARM64
2835 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4) {
2836 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2837 GemmMicrokernelTester()
2838 .mr(6)
2839 .nr(16)
2840 .kr(1)
2841 .sr(1)
2842 .m(6)
2843 .n(16)
2844 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002845 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002846 }
2847
2848 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, strided_cn) {
2849 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2850 GemmMicrokernelTester()
2851 .mr(6)
2852 .nr(16)
2853 .kr(1)
2854 .sr(1)
2855 .m(6)
2856 .n(16)
2857 .k(4)
2858 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002859 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002860 }
2861
2862 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
2863 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002864 for (uint32_t n = 1; n <= 16; n++) {
2865 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002866 GemmMicrokernelTester()
2867 .mr(6)
2868 .nr(16)
2869 .kr(1)
2870 .sr(1)
2871 .m(m)
2872 .n(n)
2873 .k(4)
2874 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002875 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002876 }
2877 }
2878 }
2879
2880 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
2881 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2882 for (uint32_t m = 1; m <= 6; m++) {
2883 GemmMicrokernelTester()
2884 .mr(6)
2885 .nr(16)
2886 .kr(1)
2887 .sr(1)
2888 .m(m)
2889 .n(16)
2890 .k(4)
2891 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002892 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002893 }
2894 }
2895
2896 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
2897 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2898 for (uint32_t n = 1; n <= 16; n++) {
2899 GemmMicrokernelTester()
2900 .mr(6)
2901 .nr(16)
2902 .kr(1)
2903 .sr(1)
2904 .m(6)
2905 .n(n)
2906 .k(4)
2907 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002908 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002909 }
2910 }
2911
2912 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_lt_4) {
2913 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2914 for (size_t k = 1; k < 4; k++) {
2915 GemmMicrokernelTester()
2916 .mr(6)
2917 .nr(16)
2918 .kr(1)
2919 .sr(1)
2920 .m(6)
2921 .n(16)
2922 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002923 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002924 }
2925 }
2926
2927 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
2928 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2929 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002930 for (uint32_t n = 1; n <= 16; n++) {
2931 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002932 GemmMicrokernelTester()
2933 .mr(6)
2934 .nr(16)
2935 .kr(1)
2936 .sr(1)
2937 .m(m)
2938 .n(n)
2939 .k(k)
2940 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002941 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002942 }
2943 }
2944 }
2945 }
2946
2947 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_gt_4) {
2948 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2949 for (size_t k = 5; k < 8; k++) {
2950 GemmMicrokernelTester()
2951 .mr(6)
2952 .nr(16)
2953 .kr(1)
2954 .sr(1)
2955 .m(6)
2956 .n(16)
2957 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002958 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002959 }
2960 }
2961
2962 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
2963 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2964 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002965 for (uint32_t n = 1; n <= 16; n++) {
2966 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002967 GemmMicrokernelTester()
2968 .mr(6)
2969 .nr(16)
2970 .kr(1)
2971 .sr(1)
2972 .m(m)
2973 .n(n)
2974 .k(k)
2975 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002976 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002977 }
2978 }
2979 }
2980 }
2981
2982 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_div_4) {
2983 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2984 for (size_t k = 8; k <= 40; k += 4) {
2985 GemmMicrokernelTester()
2986 .mr(6)
2987 .nr(16)
2988 .kr(1)
2989 .sr(1)
2990 .m(6)
2991 .n(16)
2992 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08002993 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07002994 }
2995 }
2996
2997 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
2998 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2999 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003000 for (uint32_t n = 1; n <= 16; n++) {
3001 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003002 GemmMicrokernelTester()
3003 .mr(6)
3004 .nr(16)
3005 .kr(1)
3006 .sr(1)
3007 .m(m)
3008 .n(n)
3009 .k(k)
3010 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003011 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003012 }
3013 }
3014 }
3015 }
3016
3017 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16) {
3018 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3019 for (uint32_t n = 17; n < 32; n++) {
3020 for (size_t k = 1; k <= 20; k += 5) {
3021 GemmMicrokernelTester()
3022 .mr(6)
3023 .nr(16)
3024 .kr(1)
3025 .sr(1)
3026 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003027 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003028 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003029 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003030 }
3031 }
3032 }
3033
3034 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
3035 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3036 for (uint32_t n = 17; n < 32; n++) {
3037 for (size_t k = 1; k <= 20; k += 5) {
3038 GemmMicrokernelTester()
3039 .mr(6)
3040 .nr(16)
3041 .kr(1)
3042 .sr(1)
3043 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003044 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003045 .k(k)
3046 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003047 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003048 }
3049 }
3050 }
3051
3052 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
3053 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3054 for (uint32_t n = 17; n < 32; n++) {
3055 for (size_t k = 1; k <= 20; k += 5) {
3056 for (uint32_t m = 1; m <= 6; m++) {
3057 GemmMicrokernelTester()
3058 .mr(6)
3059 .nr(16)
3060 .kr(1)
3061 .sr(1)
3062 .m(m)
3063 .n(n)
3064 .k(k)
3065 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003066 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003067 }
3068 }
3069 }
3070 }
3071
3072 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16) {
3073 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3074 for (uint32_t n = 32; n <= 48; n += 16) {
3075 for (size_t k = 1; k <= 20; k += 5) {
3076 GemmMicrokernelTester()
3077 .mr(6)
3078 .nr(16)
3079 .kr(1)
3080 .sr(1)
3081 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003082 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003083 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003084 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003085 }
3086 }
3087 }
3088
3089 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
3090 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3091 for (uint32_t n = 32; n <= 48; n += 16) {
3092 for (size_t k = 1; k <= 20; k += 5) {
3093 GemmMicrokernelTester()
3094 .mr(6)
3095 .nr(16)
3096 .kr(1)
3097 .sr(1)
3098 .m(6)
3099 .n(n)
3100 .k(k)
3101 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003102 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003103 }
3104 }
3105 }
3106
3107 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
3108 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3109 for (uint32_t n = 32; n <= 48; n += 16) {
3110 for (size_t k = 1; k <= 20; k += 5) {
3111 for (uint32_t m = 1; m <= 6; m++) {
3112 GemmMicrokernelTester()
3113 .mr(6)
3114 .nr(16)
3115 .kr(1)
3116 .sr(1)
3117 .m(m)
3118 .n(n)
3119 .k(k)
3120 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003121 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003122 }
3123 }
3124 }
3125 }
3126
3127 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, small_kernel) {
3128 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3129 for (size_t k = 1; k <= 20; k += 5) {
3130 GemmMicrokernelTester()
3131 .mr(6)
3132 .nr(16)
3133 .kr(1)
3134 .sr(1)
3135 .m(6)
3136 .n(16)
3137 .k(k)
3138 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003139 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003140 }
3141 }
3142
3143 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, small_kernel_subtile) {
3144 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3145 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003146 for (uint32_t n = 1; n <= 16; n++) {
3147 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003148 GemmMicrokernelTester()
3149 .mr(6)
3150 .nr(16)
3151 .kr(1)
3152 .sr(1)
3153 .m(m)
3154 .n(n)
3155 .k(k)
3156 .ks(3)
3157 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003158 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003159 }
3160 }
3161 }
3162 }
3163
3164 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_gt_16_small_kernel) {
3165 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3166 for (uint32_t n = 17; n < 32; n++) {
3167 for (size_t k = 1; k <= 20; k += 5) {
3168 GemmMicrokernelTester()
3169 .mr(6)
3170 .nr(16)
3171 .kr(1)
3172 .sr(1)
3173 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003174 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003175 .k(k)
3176 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003177 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003178 }
3179 }
3180 }
3181
3182 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, n_div_16_small_kernel) {
3183 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3184 for (uint32_t n = 32; n <= 48; n += 16) {
3185 for (size_t k = 1; k <= 20; k += 5) {
3186 GemmMicrokernelTester()
3187 .mr(6)
3188 .nr(16)
3189 .kr(1)
3190 .sr(1)
3191 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003192 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003193 .k(k)
3194 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003195 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003196 }
3197 }
3198 }
3199
3200 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
3201 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3202 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003203 for (uint32_t n = 1; n <= 16; n++) {
3204 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003205 GemmMicrokernelTester()
3206 .mr(6)
3207 .nr(16)
3208 .kr(1)
3209 .sr(1)
3210 .m(m)
3211 .n(n)
3212 .k(k)
3213 .cm_stride(19)
3214 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003215 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003216 }
3217 }
3218 }
3219 }
3220
3221 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, a_offset) {
3222 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3223 for (size_t k = 1; k <= 20; k += 5) {
3224 GemmMicrokernelTester()
3225 .mr(6)
3226 .nr(16)
3227 .kr(1)
3228 .sr(1)
3229 .m(6)
3230 .n(16)
3231 .k(k)
3232 .ks(3)
3233 .a_offset(127)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003234 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003235 }
3236 }
3237
3238 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, zero) {
3239 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003240 for (size_t k = 1; k <= 20; k += 5) {
3241 for (uint32_t mz = 0; mz < 6; mz++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003242 GemmMicrokernelTester()
3243 .mr(6)
3244 .nr(16)
3245 .kr(1)
3246 .sr(1)
3247 .m(6)
3248 .n(16)
3249 .k(k)
3250 .ks(3)
3251 .a_offset(127)
3252 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003253 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003254 }
3255 }
3256 }
3257
3258 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, qmin) {
3259 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3260 GemmMicrokernelTester()
3261 .mr(6)
3262 .nr(16)
3263 .kr(1)
3264 .sr(1)
3265 .m(6)
3266 .n(16)
3267 .k(4)
3268 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003269 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003270 }
3271
3272 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, qmax) {
3273 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3274 GemmMicrokernelTester()
3275 .mr(6)
3276 .nr(16)
3277 .kr(1)
3278 .sr(1)
3279 .m(6)
3280 .n(16)
3281 .k(4)
3282 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003283 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003284 }
3285
3286 TEST(F16_IGEMM_MINMAX_6X16__NEONFP16ARITH_LD64, strided_cm) {
3287 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3288 GemmMicrokernelTester()
3289 .mr(6)
3290 .nr(16)
3291 .kr(1)
3292 .sr(1)
3293 .m(6)
3294 .n(16)
3295 .k(4)
3296 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003297 .Test(xnn_f16_igemm_minmax_ukernel_6x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003298 }
3299#endif // XNN_ARCH_ARM64
3300
3301
3302#if XNN_ARCH_ARM64
3303 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4) {
3304 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3305 GemmMicrokernelTester()
3306 .mr(8)
3307 .nr(16)
3308 .kr(1)
3309 .sr(1)
3310 .m(8)
3311 .n(16)
3312 .k(4)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003313 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003314 }
3315
3316 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, strided_cn) {
3317 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3318 GemmMicrokernelTester()
3319 .mr(8)
3320 .nr(16)
3321 .kr(1)
3322 .sr(1)
3323 .m(8)
3324 .n(16)
3325 .k(4)
3326 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003327 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003328 }
3329
3330 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_subtile) {
3331 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003332 for (uint32_t n = 1; n <= 16; n++) {
3333 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003334 GemmMicrokernelTester()
3335 .mr(8)
3336 .nr(16)
3337 .kr(1)
3338 .sr(1)
3339 .m(m)
3340 .n(n)
3341 .k(4)
3342 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003343 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003344 }
3345 }
3346 }
3347
3348 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_subtile_m) {
3349 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3350 for (uint32_t m = 1; m <= 8; m++) {
3351 GemmMicrokernelTester()
3352 .mr(8)
3353 .nr(16)
3354 .kr(1)
3355 .sr(1)
3356 .m(m)
3357 .n(16)
3358 .k(4)
3359 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003360 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003361 }
3362 }
3363
3364 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_eq_4_subtile_n) {
3365 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3366 for (uint32_t n = 1; n <= 16; n++) {
3367 GemmMicrokernelTester()
3368 .mr(8)
3369 .nr(16)
3370 .kr(1)
3371 .sr(1)
3372 .m(8)
3373 .n(n)
3374 .k(4)
3375 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003376 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003377 }
3378 }
3379
3380 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_lt_4) {
3381 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3382 for (size_t k = 1; k < 4; k++) {
3383 GemmMicrokernelTester()
3384 .mr(8)
3385 .nr(16)
3386 .kr(1)
3387 .sr(1)
3388 .m(8)
3389 .n(16)
3390 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003391 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003392 }
3393 }
3394
3395 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_lt_4_subtile) {
3396 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3397 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003398 for (uint32_t n = 1; n <= 16; n++) {
3399 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003400 GemmMicrokernelTester()
3401 .mr(8)
3402 .nr(16)
3403 .kr(1)
3404 .sr(1)
3405 .m(m)
3406 .n(n)
3407 .k(k)
3408 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003409 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003410 }
3411 }
3412 }
3413 }
3414
3415 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_gt_4) {
3416 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3417 for (size_t k = 5; k < 8; k++) {
3418 GemmMicrokernelTester()
3419 .mr(8)
3420 .nr(16)
3421 .kr(1)
3422 .sr(1)
3423 .m(8)
3424 .n(16)
3425 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003426 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003427 }
3428 }
3429
3430 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_gt_4_subtile) {
3431 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3432 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003433 for (uint32_t n = 1; n <= 16; n++) {
3434 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003435 GemmMicrokernelTester()
3436 .mr(8)
3437 .nr(16)
3438 .kr(1)
3439 .sr(1)
3440 .m(m)
3441 .n(n)
3442 .k(k)
3443 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003444 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003445 }
3446 }
3447 }
3448 }
3449
3450 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_div_4) {
3451 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3452 for (size_t k = 8; k <= 40; k += 4) {
3453 GemmMicrokernelTester()
3454 .mr(8)
3455 .nr(16)
3456 .kr(1)
3457 .sr(1)
3458 .m(8)
3459 .n(16)
3460 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003461 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003462 }
3463 }
3464
3465 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, k_div_4_subtile) {
3466 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3467 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003468 for (uint32_t n = 1; n <= 16; n++) {
3469 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003470 GemmMicrokernelTester()
3471 .mr(8)
3472 .nr(16)
3473 .kr(1)
3474 .sr(1)
3475 .m(m)
3476 .n(n)
3477 .k(k)
3478 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003479 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003480 }
3481 }
3482 }
3483 }
3484
3485 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16) {
3486 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3487 for (uint32_t n = 17; n < 32; n++) {
3488 for (size_t k = 1; k <= 20; k += 5) {
3489 GemmMicrokernelTester()
3490 .mr(8)
3491 .nr(16)
3492 .kr(1)
3493 .sr(1)
3494 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003495 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003496 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003497 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003498 }
3499 }
3500 }
3501
3502 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16_strided_cn) {
3503 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3504 for (uint32_t n = 17; n < 32; n++) {
3505 for (size_t k = 1; k <= 20; k += 5) {
3506 GemmMicrokernelTester()
3507 .mr(8)
3508 .nr(16)
3509 .kr(1)
3510 .sr(1)
3511 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003512 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003513 .k(k)
3514 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003515 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003516 }
3517 }
3518 }
3519
3520 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16_subtile) {
3521 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3522 for (uint32_t n = 17; n < 32; n++) {
3523 for (size_t k = 1; k <= 20; k += 5) {
3524 for (uint32_t m = 1; m <= 8; m++) {
3525 GemmMicrokernelTester()
3526 .mr(8)
3527 .nr(16)
3528 .kr(1)
3529 .sr(1)
3530 .m(m)
3531 .n(n)
3532 .k(k)
3533 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003534 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003535 }
3536 }
3537 }
3538 }
3539
3540 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16) {
3541 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3542 for (uint32_t n = 32; n <= 48; n += 16) {
3543 for (size_t k = 1; k <= 20; k += 5) {
3544 GemmMicrokernelTester()
3545 .mr(8)
3546 .nr(16)
3547 .kr(1)
3548 .sr(1)
3549 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003550 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003551 .k(k)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003552 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003553 }
3554 }
3555 }
3556
3557 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16_strided_cn) {
3558 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3559 for (uint32_t n = 32; n <= 48; n += 16) {
3560 for (size_t k = 1; k <= 20; k += 5) {
3561 GemmMicrokernelTester()
3562 .mr(8)
3563 .nr(16)
3564 .kr(1)
3565 .sr(1)
3566 .m(8)
3567 .n(n)
3568 .k(k)
3569 .cn_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003570 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003571 }
3572 }
3573 }
3574
3575 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16_subtile) {
3576 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3577 for (uint32_t n = 32; n <= 48; n += 16) {
3578 for (size_t k = 1; k <= 20; k += 5) {
3579 for (uint32_t m = 1; m <= 8; m++) {
3580 GemmMicrokernelTester()
3581 .mr(8)
3582 .nr(16)
3583 .kr(1)
3584 .sr(1)
3585 .m(m)
3586 .n(n)
3587 .k(k)
3588 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003589 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003590 }
3591 }
3592 }
3593 }
3594
3595 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, small_kernel) {
3596 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3597 for (size_t k = 1; k <= 20; k += 5) {
3598 GemmMicrokernelTester()
3599 .mr(8)
3600 .nr(16)
3601 .kr(1)
3602 .sr(1)
3603 .m(8)
3604 .n(16)
3605 .k(k)
3606 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003607 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003608 }
3609 }
3610
3611 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, small_kernel_subtile) {
3612 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3613 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003614 for (uint32_t n = 1; n <= 16; n++) {
3615 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003616 GemmMicrokernelTester()
3617 .mr(8)
3618 .nr(16)
3619 .kr(1)
3620 .sr(1)
3621 .m(m)
3622 .n(n)
3623 .k(k)
3624 .ks(3)
3625 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003626 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003627 }
3628 }
3629 }
3630 }
3631
3632 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_gt_16_small_kernel) {
3633 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3634 for (uint32_t n = 17; n < 32; n++) {
3635 for (size_t k = 1; k <= 20; k += 5) {
3636 GemmMicrokernelTester()
3637 .mr(8)
3638 .nr(16)
3639 .kr(1)
3640 .sr(1)
3641 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003642 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003643 .k(k)
3644 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003645 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003646 }
3647 }
3648 }
3649
3650 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, n_div_16_small_kernel) {
3651 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3652 for (uint32_t n = 32; n <= 48; n += 16) {
3653 for (size_t k = 1; k <= 20; k += 5) {
3654 GemmMicrokernelTester()
3655 .mr(8)
3656 .nr(16)
3657 .kr(1)
3658 .sr(1)
3659 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003660 .n(n)
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003661 .k(k)
3662 .ks(3)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003663 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003664 }
3665 }
3666 }
3667
3668 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, strided_cm_subtile) {
3669 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3670 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003671 for (uint32_t n = 1; n <= 16; n++) {
3672 for (uint32_t m = 1; m <= 8; m++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003673 GemmMicrokernelTester()
3674 .mr(8)
3675 .nr(16)
3676 .kr(1)
3677 .sr(1)
3678 .m(m)
3679 .n(n)
3680 .k(k)
3681 .cm_stride(19)
3682 .iterations(1)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003683 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003684 }
3685 }
3686 }
3687 }
3688
3689 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, a_offset) {
3690 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3691 for (size_t k = 1; k <= 20; k += 5) {
3692 GemmMicrokernelTester()
3693 .mr(8)
3694 .nr(16)
3695 .kr(1)
3696 .sr(1)
3697 .m(8)
3698 .n(16)
3699 .k(k)
3700 .ks(3)
3701 .a_offset(163)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003702 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003703 }
3704 }
3705
3706 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, zero) {
3707 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003708 for (size_t k = 1; k <= 20; k += 5) {
3709 for (uint32_t mz = 0; mz < 8; mz++) {
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003710 GemmMicrokernelTester()
3711 .mr(8)
3712 .nr(16)
3713 .kr(1)
3714 .sr(1)
3715 .m(8)
3716 .n(16)
3717 .k(k)
3718 .ks(3)
3719 .a_offset(163)
3720 .zero_index(mz)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003721 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003722 }
3723 }
3724 }
3725
3726 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, qmin) {
3727 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3728 GemmMicrokernelTester()
3729 .mr(8)
3730 .nr(16)
3731 .kr(1)
3732 .sr(1)
3733 .m(8)
3734 .n(16)
3735 .k(4)
3736 .qmin(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003737 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003738 }
3739
3740 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, qmax) {
3741 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3742 GemmMicrokernelTester()
3743 .mr(8)
3744 .nr(16)
3745 .kr(1)
3746 .sr(1)
3747 .m(8)
3748 .n(16)
3749 .k(4)
3750 .qmax(128)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003751 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003752 }
3753
3754 TEST(F16_IGEMM_MINMAX_8X16__NEONFP16ARITH_LD64, strided_cm) {
3755 TEST_REQUIRES_ARM_NEON_FP16_ARITH;
3756 GemmMicrokernelTester()
3757 .mr(8)
3758 .nr(16)
3759 .kr(1)
3760 .sr(1)
3761 .m(8)
3762 .n(16)
3763 .k(4)
3764 .cm_stride(19)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003765 .Test(xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64, xnn_init_f16_scaleminmax_neon_params);
Frank Barchard3f9f99f2020-05-06 01:12:04 -07003766 }
3767#endif // XNN_ARCH_ARM64
Marat Dukhanc4302c22022-01-06 19:27:03 -08003768
3769
3770#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3771 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1) {
3772 TEST_REQUIRES_X86_AVX2;
3773 GemmMicrokernelTester()
3774 .mr(1)
3775 .nr(8)
3776 .kr(1)
3777 .sr(1)
3778 .m(1)
3779 .n(8)
3780 .k(1)
3781 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3782 }
3783
3784 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, strided_cn) {
3785 TEST_REQUIRES_X86_AVX2;
3786 GemmMicrokernelTester()
3787 .mr(1)
3788 .nr(8)
3789 .kr(1)
3790 .sr(1)
3791 .m(1)
3792 .n(8)
3793 .k(1)
3794 .cn_stride(11)
3795 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3796 }
3797
3798 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_subtile) {
3799 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 8; n++) {
3801 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08003802 GemmMicrokernelTester()
3803 .mr(1)
3804 .nr(8)
3805 .kr(1)
3806 .sr(1)
3807 .m(m)
3808 .n(n)
3809 .k(1)
3810 .iterations(1)
3811 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3812 }
3813 }
3814 }
3815
3816 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
3817 TEST_REQUIRES_X86_AVX2;
3818 for (uint32_t m = 1; m <= 1; m++) {
3819 GemmMicrokernelTester()
3820 .mr(1)
3821 .nr(8)
3822 .kr(1)
3823 .sr(1)
3824 .m(m)
3825 .n(8)
3826 .k(1)
3827 .iterations(1)
3828 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3829 }
3830 }
3831
3832 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
3833 TEST_REQUIRES_X86_AVX2;
3834 for (uint32_t n = 1; n <= 8; n++) {
3835 GemmMicrokernelTester()
3836 .mr(1)
3837 .nr(8)
3838 .kr(1)
3839 .sr(1)
3840 .m(1)
3841 .n(n)
3842 .k(1)
3843 .iterations(1)
3844 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3845 }
3846 }
3847
3848 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, k_gt_1) {
3849 TEST_REQUIRES_X86_AVX2;
3850 for (size_t k = 2; k < 10; k++) {
3851 GemmMicrokernelTester()
3852 .mr(1)
3853 .nr(8)
3854 .kr(1)
3855 .sr(1)
3856 .m(1)
3857 .n(8)
3858 .k(k)
3859 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3860 }
3861 }
3862
3863 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, k_gt_1_subtile) {
3864 TEST_REQUIRES_X86_AVX2;
3865 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003866 for (uint32_t n = 1; n <= 8; n++) {
3867 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08003868 GemmMicrokernelTester()
3869 .mr(1)
3870 .nr(8)
3871 .kr(1)
3872 .sr(1)
3873 .m(m)
3874 .n(n)
3875 .k(k)
3876 .iterations(1)
3877 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3878 }
3879 }
3880 }
3881 }
3882
3883 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8) {
3884 TEST_REQUIRES_X86_AVX2;
3885 for (uint32_t n = 9; n < 16; n++) {
3886 for (size_t k = 1; k <= 5; k += 2) {
3887 GemmMicrokernelTester()
3888 .mr(1)
3889 .nr(8)
3890 .kr(1)
3891 .sr(1)
3892 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003893 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003894 .k(k)
3895 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3896 }
3897 }
3898 }
3899
3900 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
3901 TEST_REQUIRES_X86_AVX2;
3902 for (uint32_t n = 9; n < 16; n++) {
3903 for (size_t k = 1; k <= 5; k += 2) {
3904 GemmMicrokernelTester()
3905 .mr(1)
3906 .nr(8)
3907 .kr(1)
3908 .sr(1)
3909 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003910 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003911 .k(k)
3912 .cn_stride(11)
3913 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3914 }
3915 }
3916 }
3917
3918 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8_subtile) {
3919 TEST_REQUIRES_X86_AVX2;
3920 for (uint32_t n = 9; n < 16; n++) {
3921 for (size_t k = 1; k <= 5; k += 2) {
3922 for (uint32_t m = 1; m <= 1; m++) {
3923 GemmMicrokernelTester()
3924 .mr(1)
3925 .nr(8)
3926 .kr(1)
3927 .sr(1)
3928 .m(m)
3929 .n(n)
3930 .k(k)
3931 .iterations(1)
3932 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3933 }
3934 }
3935 }
3936 }
3937
3938 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8) {
3939 TEST_REQUIRES_X86_AVX2;
3940 for (uint32_t n = 16; n <= 24; n += 8) {
3941 for (size_t k = 1; k <= 5; k += 2) {
3942 GemmMicrokernelTester()
3943 .mr(1)
3944 .nr(8)
3945 .kr(1)
3946 .sr(1)
3947 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003948 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08003949 .k(k)
3950 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3951 }
3952 }
3953 }
3954
3955 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8_strided_cn) {
3956 TEST_REQUIRES_X86_AVX2;
3957 for (uint32_t n = 16; n <= 24; n += 8) {
3958 for (size_t k = 1; k <= 5; k += 2) {
3959 GemmMicrokernelTester()
3960 .mr(1)
3961 .nr(8)
3962 .kr(1)
3963 .sr(1)
3964 .m(1)
3965 .n(n)
3966 .k(k)
3967 .cn_stride(11)
3968 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3969 }
3970 }
3971 }
3972
3973 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8_subtile) {
3974 TEST_REQUIRES_X86_AVX2;
3975 for (uint32_t n = 16; n <= 24; n += 8) {
3976 for (size_t k = 1; k <= 5; k += 2) {
3977 for (uint32_t m = 1; m <= 1; m++) {
3978 GemmMicrokernelTester()
3979 .mr(1)
3980 .nr(8)
3981 .kr(1)
3982 .sr(1)
3983 .m(m)
3984 .n(n)
3985 .k(k)
3986 .iterations(1)
3987 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
3988 }
3989 }
3990 }
3991 }
3992
3993 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, small_kernel) {
3994 TEST_REQUIRES_X86_AVX2;
3995 for (size_t k = 1; k <= 5; k += 2) {
3996 GemmMicrokernelTester()
3997 .mr(1)
3998 .nr(8)
3999 .kr(1)
4000 .sr(1)
4001 .m(1)
4002 .n(8)
4003 .k(k)
4004 .ks(3)
4005 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4006 }
4007 }
4008
4009 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, small_kernel_subtile) {
4010 TEST_REQUIRES_X86_AVX2;
4011 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004012 for (uint32_t n = 1; n <= 8; n++) {
4013 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004014 GemmMicrokernelTester()
4015 .mr(1)
4016 .nr(8)
4017 .kr(1)
4018 .sr(1)
4019 .m(m)
4020 .n(n)
4021 .k(k)
4022 .ks(3)
4023 .iterations(1)
4024 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4025 }
4026 }
4027 }
4028 }
4029
4030 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_gt_8_small_kernel) {
4031 TEST_REQUIRES_X86_AVX2;
4032 for (uint32_t n = 9; n < 16; n++) {
4033 for (size_t k = 1; k <= 5; k += 2) {
4034 GemmMicrokernelTester()
4035 .mr(1)
4036 .nr(8)
4037 .kr(1)
4038 .sr(1)
4039 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004040 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004041 .k(k)
4042 .ks(3)
4043 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4044 }
4045 }
4046 }
4047
4048 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, n_div_8_small_kernel) {
4049 TEST_REQUIRES_X86_AVX2;
4050 for (uint32_t n = 16; n <= 24; n += 8) {
4051 for (size_t k = 1; k <= 5; k += 2) {
4052 GemmMicrokernelTester()
4053 .mr(1)
4054 .nr(8)
4055 .kr(1)
4056 .sr(1)
4057 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004058 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004059 .k(k)
4060 .ks(3)
4061 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4062 }
4063 }
4064 }
4065
4066 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, strided_cm_subtile) {
4067 TEST_REQUIRES_X86_AVX2;
4068 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004069 for (uint32_t n = 1; n <= 8; n++) {
4070 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004071 GemmMicrokernelTester()
4072 .mr(1)
4073 .nr(8)
4074 .kr(1)
4075 .sr(1)
4076 .m(m)
4077 .n(n)
4078 .k(k)
4079 .cm_stride(11)
4080 .iterations(1)
4081 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4082 }
4083 }
4084 }
4085 }
4086
4087 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, a_offset) {
4088 TEST_REQUIRES_X86_AVX2;
4089 for (size_t k = 1; k <= 5; k += 2) {
4090 GemmMicrokernelTester()
4091 .mr(1)
4092 .nr(8)
4093 .kr(1)
4094 .sr(1)
4095 .m(1)
4096 .n(8)
4097 .k(k)
4098 .ks(3)
4099 .a_offset(7)
4100 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4101 }
4102 }
4103
4104 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, zero) {
4105 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004106 for (size_t k = 1; k <= 5; k += 2) {
4107 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004108 GemmMicrokernelTester()
4109 .mr(1)
4110 .nr(8)
4111 .kr(1)
4112 .sr(1)
4113 .m(1)
4114 .n(8)
4115 .k(k)
4116 .ks(3)
4117 .a_offset(7)
4118 .zero_index(mz)
4119 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4120 }
4121 }
4122 }
4123
4124 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, qmin) {
4125 TEST_REQUIRES_X86_AVX2;
4126 GemmMicrokernelTester()
4127 .mr(1)
4128 .nr(8)
4129 .kr(1)
4130 .sr(1)
4131 .m(1)
4132 .n(8)
4133 .k(1)
4134 .qmin(128)
4135 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4136 }
4137
4138 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, qmax) {
4139 TEST_REQUIRES_X86_AVX2;
4140 GemmMicrokernelTester()
4141 .mr(1)
4142 .nr(8)
4143 .kr(1)
4144 .sr(1)
4145 .m(1)
4146 .n(8)
4147 .k(1)
4148 .qmax(128)
4149 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4150 }
4151
4152 TEST(F16_IGEMM_MINMAX_1X8__AVX2_BROADCAST, strided_cm) {
4153 TEST_REQUIRES_X86_AVX2;
4154 GemmMicrokernelTester()
4155 .mr(1)
4156 .nr(8)
4157 .kr(1)
4158 .sr(1)
4159 .m(1)
4160 .n(8)
4161 .k(1)
4162 .cm_stride(11)
4163 .Test(xnn_f16_igemm_minmax_ukernel_1x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4164 }
4165#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4166
4167
4168#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4169 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1) {
4170 TEST_REQUIRES_X86_AVX2;
4171 GemmMicrokernelTester()
4172 .mr(4)
4173 .nr(8)
4174 .kr(1)
4175 .sr(1)
4176 .m(4)
4177 .n(8)
4178 .k(1)
4179 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4180 }
4181
4182 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, strided_cn) {
4183 TEST_REQUIRES_X86_AVX2;
4184 GemmMicrokernelTester()
4185 .mr(4)
4186 .nr(8)
4187 .kr(1)
4188 .sr(1)
4189 .m(4)
4190 .n(8)
4191 .k(1)
4192 .cn_stride(11)
4193 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4194 }
4195
4196 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_subtile) {
4197 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004198 for (uint32_t n = 1; n <= 8; n++) {
4199 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004200 GemmMicrokernelTester()
4201 .mr(4)
4202 .nr(8)
4203 .kr(1)
4204 .sr(1)
4205 .m(m)
4206 .n(n)
4207 .k(1)
4208 .iterations(1)
4209 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4210 }
4211 }
4212 }
4213
4214 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
4215 TEST_REQUIRES_X86_AVX2;
4216 for (uint32_t m = 1; m <= 4; m++) {
4217 GemmMicrokernelTester()
4218 .mr(4)
4219 .nr(8)
4220 .kr(1)
4221 .sr(1)
4222 .m(m)
4223 .n(8)
4224 .k(1)
4225 .iterations(1)
4226 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4227 }
4228 }
4229
4230 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
4231 TEST_REQUIRES_X86_AVX2;
4232 for (uint32_t n = 1; n <= 8; n++) {
4233 GemmMicrokernelTester()
4234 .mr(4)
4235 .nr(8)
4236 .kr(1)
4237 .sr(1)
4238 .m(4)
4239 .n(n)
4240 .k(1)
4241 .iterations(1)
4242 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4243 }
4244 }
4245
4246 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, k_gt_1) {
4247 TEST_REQUIRES_X86_AVX2;
4248 for (size_t k = 2; k < 10; k++) {
4249 GemmMicrokernelTester()
4250 .mr(4)
4251 .nr(8)
4252 .kr(1)
4253 .sr(1)
4254 .m(4)
4255 .n(8)
4256 .k(k)
4257 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4258 }
4259 }
4260
4261 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, k_gt_1_subtile) {
4262 TEST_REQUIRES_X86_AVX2;
4263 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004264 for (uint32_t n = 1; n <= 8; n++) {
4265 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004266 GemmMicrokernelTester()
4267 .mr(4)
4268 .nr(8)
4269 .kr(1)
4270 .sr(1)
4271 .m(m)
4272 .n(n)
4273 .k(k)
4274 .iterations(1)
4275 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4276 }
4277 }
4278 }
4279 }
4280
4281 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8) {
4282 TEST_REQUIRES_X86_AVX2;
4283 for (uint32_t n = 9; n < 16; n++) {
4284 for (size_t k = 1; k <= 5; k += 2) {
4285 GemmMicrokernelTester()
4286 .mr(4)
4287 .nr(8)
4288 .kr(1)
4289 .sr(1)
4290 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004291 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004292 .k(k)
4293 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4294 }
4295 }
4296 }
4297
4298 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
4299 TEST_REQUIRES_X86_AVX2;
4300 for (uint32_t n = 9; n < 16; n++) {
4301 for (size_t k = 1; k <= 5; k += 2) {
4302 GemmMicrokernelTester()
4303 .mr(4)
4304 .nr(8)
4305 .kr(1)
4306 .sr(1)
4307 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004308 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004309 .k(k)
4310 .cn_stride(11)
4311 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4312 }
4313 }
4314 }
4315
4316 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8_subtile) {
4317 TEST_REQUIRES_X86_AVX2;
4318 for (uint32_t n = 9; n < 16; n++) {
4319 for (size_t k = 1; k <= 5; k += 2) {
4320 for (uint32_t m = 1; m <= 4; m++) {
4321 GemmMicrokernelTester()
4322 .mr(4)
4323 .nr(8)
4324 .kr(1)
4325 .sr(1)
4326 .m(m)
4327 .n(n)
4328 .k(k)
4329 .iterations(1)
4330 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4331 }
4332 }
4333 }
4334 }
4335
4336 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8) {
4337 TEST_REQUIRES_X86_AVX2;
4338 for (uint32_t n = 16; n <= 24; n += 8) {
4339 for (size_t k = 1; k <= 5; k += 2) {
4340 GemmMicrokernelTester()
4341 .mr(4)
4342 .nr(8)
4343 .kr(1)
4344 .sr(1)
4345 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004346 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004347 .k(k)
4348 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4349 }
4350 }
4351 }
4352
4353 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8_strided_cn) {
4354 TEST_REQUIRES_X86_AVX2;
4355 for (uint32_t n = 16; n <= 24; n += 8) {
4356 for (size_t k = 1; k <= 5; k += 2) {
4357 GemmMicrokernelTester()
4358 .mr(4)
4359 .nr(8)
4360 .kr(1)
4361 .sr(1)
4362 .m(4)
4363 .n(n)
4364 .k(k)
4365 .cn_stride(11)
4366 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4367 }
4368 }
4369 }
4370
4371 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8_subtile) {
4372 TEST_REQUIRES_X86_AVX2;
4373 for (uint32_t n = 16; n <= 24; n += 8) {
4374 for (size_t k = 1; k <= 5; k += 2) {
4375 for (uint32_t m = 1; m <= 4; m++) {
4376 GemmMicrokernelTester()
4377 .mr(4)
4378 .nr(8)
4379 .kr(1)
4380 .sr(1)
4381 .m(m)
4382 .n(n)
4383 .k(k)
4384 .iterations(1)
4385 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4386 }
4387 }
4388 }
4389 }
4390
4391 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, small_kernel) {
4392 TEST_REQUIRES_X86_AVX2;
4393 for (size_t k = 1; k <= 5; k += 2) {
4394 GemmMicrokernelTester()
4395 .mr(4)
4396 .nr(8)
4397 .kr(1)
4398 .sr(1)
4399 .m(4)
4400 .n(8)
4401 .k(k)
4402 .ks(3)
4403 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4404 }
4405 }
4406
4407 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, small_kernel_subtile) {
4408 TEST_REQUIRES_X86_AVX2;
4409 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004410 for (uint32_t n = 1; n <= 8; n++) {
4411 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004412 GemmMicrokernelTester()
4413 .mr(4)
4414 .nr(8)
4415 .kr(1)
4416 .sr(1)
4417 .m(m)
4418 .n(n)
4419 .k(k)
4420 .ks(3)
4421 .iterations(1)
4422 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4423 }
4424 }
4425 }
4426 }
4427
4428 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_gt_8_small_kernel) {
4429 TEST_REQUIRES_X86_AVX2;
4430 for (uint32_t n = 9; n < 16; n++) {
4431 for (size_t k = 1; k <= 5; k += 2) {
4432 GemmMicrokernelTester()
4433 .mr(4)
4434 .nr(8)
4435 .kr(1)
4436 .sr(1)
4437 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004438 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004439 .k(k)
4440 .ks(3)
4441 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4442 }
4443 }
4444 }
4445
4446 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, n_div_8_small_kernel) {
4447 TEST_REQUIRES_X86_AVX2;
4448 for (uint32_t n = 16; n <= 24; n += 8) {
4449 for (size_t k = 1; k <= 5; k += 2) {
4450 GemmMicrokernelTester()
4451 .mr(4)
4452 .nr(8)
4453 .kr(1)
4454 .sr(1)
4455 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004456 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004457 .k(k)
4458 .ks(3)
4459 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4460 }
4461 }
4462 }
4463
4464 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, strided_cm_subtile) {
4465 TEST_REQUIRES_X86_AVX2;
4466 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004467 for (uint32_t n = 1; n <= 8; n++) {
4468 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004469 GemmMicrokernelTester()
4470 .mr(4)
4471 .nr(8)
4472 .kr(1)
4473 .sr(1)
4474 .m(m)
4475 .n(n)
4476 .k(k)
4477 .cm_stride(11)
4478 .iterations(1)
4479 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4480 }
4481 }
4482 }
4483 }
4484
4485 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, a_offset) {
4486 TEST_REQUIRES_X86_AVX2;
4487 for (size_t k = 1; k <= 5; k += 2) {
4488 GemmMicrokernelTester()
4489 .mr(4)
4490 .nr(8)
4491 .kr(1)
4492 .sr(1)
4493 .m(4)
4494 .n(8)
4495 .k(k)
4496 .ks(3)
4497 .a_offset(23)
4498 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4499 }
4500 }
4501
4502 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, zero) {
4503 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004504 for (size_t k = 1; k <= 5; k += 2) {
4505 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004506 GemmMicrokernelTester()
4507 .mr(4)
4508 .nr(8)
4509 .kr(1)
4510 .sr(1)
4511 .m(4)
4512 .n(8)
4513 .k(k)
4514 .ks(3)
4515 .a_offset(23)
4516 .zero_index(mz)
4517 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4518 }
4519 }
4520 }
4521
4522 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, qmin) {
4523 TEST_REQUIRES_X86_AVX2;
4524 GemmMicrokernelTester()
4525 .mr(4)
4526 .nr(8)
4527 .kr(1)
4528 .sr(1)
4529 .m(4)
4530 .n(8)
4531 .k(1)
4532 .qmin(128)
4533 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4534 }
4535
4536 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, qmax) {
4537 TEST_REQUIRES_X86_AVX2;
4538 GemmMicrokernelTester()
4539 .mr(4)
4540 .nr(8)
4541 .kr(1)
4542 .sr(1)
4543 .m(4)
4544 .n(8)
4545 .k(1)
4546 .qmax(128)
4547 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4548 }
4549
4550 TEST(F16_IGEMM_MINMAX_4X8__AVX2_BROADCAST, strided_cm) {
4551 TEST_REQUIRES_X86_AVX2;
4552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(8)
4555 .kr(1)
4556 .sr(1)
4557 .m(4)
4558 .n(8)
4559 .k(1)
4560 .cm_stride(11)
4561 .Test(xnn_f16_igemm_minmax_ukernel_4x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4562 }
4563#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4564
4565
4566#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4567 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1) {
4568 TEST_REQUIRES_X86_AVX2;
4569 GemmMicrokernelTester()
4570 .mr(5)
4571 .nr(8)
4572 .kr(1)
4573 .sr(1)
4574 .m(5)
4575 .n(8)
4576 .k(1)
4577 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4578 }
4579
4580 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, strided_cn) {
4581 TEST_REQUIRES_X86_AVX2;
4582 GemmMicrokernelTester()
4583 .mr(5)
4584 .nr(8)
4585 .kr(1)
4586 .sr(1)
4587 .m(5)
4588 .n(8)
4589 .k(1)
4590 .cn_stride(11)
4591 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4592 }
4593
4594 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_subtile) {
4595 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004596 for (uint32_t n = 1; n <= 8; n++) {
4597 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004598 GemmMicrokernelTester()
4599 .mr(5)
4600 .nr(8)
4601 .kr(1)
4602 .sr(1)
4603 .m(m)
4604 .n(n)
4605 .k(1)
4606 .iterations(1)
4607 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4608 }
4609 }
4610 }
4611
4612 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
4613 TEST_REQUIRES_X86_AVX2;
4614 for (uint32_t m = 1; m <= 5; m++) {
4615 GemmMicrokernelTester()
4616 .mr(5)
4617 .nr(8)
4618 .kr(1)
4619 .sr(1)
4620 .m(m)
4621 .n(8)
4622 .k(1)
4623 .iterations(1)
4624 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4625 }
4626 }
4627
4628 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
4629 TEST_REQUIRES_X86_AVX2;
4630 for (uint32_t n = 1; n <= 8; n++) {
4631 GemmMicrokernelTester()
4632 .mr(5)
4633 .nr(8)
4634 .kr(1)
4635 .sr(1)
4636 .m(5)
4637 .n(n)
4638 .k(1)
4639 .iterations(1)
4640 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4641 }
4642 }
4643
4644 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, k_gt_1) {
4645 TEST_REQUIRES_X86_AVX2;
4646 for (size_t k = 2; k < 10; k++) {
4647 GemmMicrokernelTester()
4648 .mr(5)
4649 .nr(8)
4650 .kr(1)
4651 .sr(1)
4652 .m(5)
4653 .n(8)
4654 .k(k)
4655 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4656 }
4657 }
4658
4659 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, k_gt_1_subtile) {
4660 TEST_REQUIRES_X86_AVX2;
4661 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004662 for (uint32_t n = 1; n <= 8; n++) {
4663 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004664 GemmMicrokernelTester()
4665 .mr(5)
4666 .nr(8)
4667 .kr(1)
4668 .sr(1)
4669 .m(m)
4670 .n(n)
4671 .k(k)
4672 .iterations(1)
4673 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4674 }
4675 }
4676 }
4677 }
4678
4679 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8) {
4680 TEST_REQUIRES_X86_AVX2;
4681 for (uint32_t n = 9; n < 16; n++) {
4682 for (size_t k = 1; k <= 5; k += 2) {
4683 GemmMicrokernelTester()
4684 .mr(5)
4685 .nr(8)
4686 .kr(1)
4687 .sr(1)
4688 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004689 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004690 .k(k)
4691 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4692 }
4693 }
4694 }
4695
4696 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
4697 TEST_REQUIRES_X86_AVX2;
4698 for (uint32_t n = 9; n < 16; n++) {
4699 for (size_t k = 1; k <= 5; k += 2) {
4700 GemmMicrokernelTester()
4701 .mr(5)
4702 .nr(8)
4703 .kr(1)
4704 .sr(1)
4705 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004706 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004707 .k(k)
4708 .cn_stride(11)
4709 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4710 }
4711 }
4712 }
4713
4714 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8_subtile) {
4715 TEST_REQUIRES_X86_AVX2;
4716 for (uint32_t n = 9; n < 16; n++) {
4717 for (size_t k = 1; k <= 5; k += 2) {
4718 for (uint32_t m = 1; m <= 5; m++) {
4719 GemmMicrokernelTester()
4720 .mr(5)
4721 .nr(8)
4722 .kr(1)
4723 .sr(1)
4724 .m(m)
4725 .n(n)
4726 .k(k)
4727 .iterations(1)
4728 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4729 }
4730 }
4731 }
4732 }
4733
4734 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8) {
4735 TEST_REQUIRES_X86_AVX2;
4736 for (uint32_t n = 16; n <= 24; n += 8) {
4737 for (size_t k = 1; k <= 5; k += 2) {
4738 GemmMicrokernelTester()
4739 .mr(5)
4740 .nr(8)
4741 .kr(1)
4742 .sr(1)
4743 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004744 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004745 .k(k)
4746 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4747 }
4748 }
4749 }
4750
4751 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8_strided_cn) {
4752 TEST_REQUIRES_X86_AVX2;
4753 for (uint32_t n = 16; n <= 24; n += 8) {
4754 for (size_t k = 1; k <= 5; k += 2) {
4755 GemmMicrokernelTester()
4756 .mr(5)
4757 .nr(8)
4758 .kr(1)
4759 .sr(1)
4760 .m(5)
4761 .n(n)
4762 .k(k)
4763 .cn_stride(11)
4764 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4765 }
4766 }
4767 }
4768
4769 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8_subtile) {
4770 TEST_REQUIRES_X86_AVX2;
4771 for (uint32_t n = 16; n <= 24; n += 8) {
4772 for (size_t k = 1; k <= 5; k += 2) {
4773 for (uint32_t m = 1; m <= 5; m++) {
4774 GemmMicrokernelTester()
4775 .mr(5)
4776 .nr(8)
4777 .kr(1)
4778 .sr(1)
4779 .m(m)
4780 .n(n)
4781 .k(k)
4782 .iterations(1)
4783 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4784 }
4785 }
4786 }
4787 }
4788
4789 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, small_kernel) {
4790 TEST_REQUIRES_X86_AVX2;
4791 for (size_t k = 1; k <= 5; k += 2) {
4792 GemmMicrokernelTester()
4793 .mr(5)
4794 .nr(8)
4795 .kr(1)
4796 .sr(1)
4797 .m(5)
4798 .n(8)
4799 .k(k)
4800 .ks(3)
4801 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4802 }
4803 }
4804
4805 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, small_kernel_subtile) {
4806 TEST_REQUIRES_X86_AVX2;
4807 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004808 for (uint32_t n = 1; n <= 8; n++) {
4809 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004810 GemmMicrokernelTester()
4811 .mr(5)
4812 .nr(8)
4813 .kr(1)
4814 .sr(1)
4815 .m(m)
4816 .n(n)
4817 .k(k)
4818 .ks(3)
4819 .iterations(1)
4820 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4821 }
4822 }
4823 }
4824 }
4825
4826 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_gt_8_small_kernel) {
4827 TEST_REQUIRES_X86_AVX2;
4828 for (uint32_t n = 9; n < 16; n++) {
4829 for (size_t k = 1; k <= 5; k += 2) {
4830 GemmMicrokernelTester()
4831 .mr(5)
4832 .nr(8)
4833 .kr(1)
4834 .sr(1)
4835 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004836 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004837 .k(k)
4838 .ks(3)
4839 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4840 }
4841 }
4842 }
4843
4844 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, n_div_8_small_kernel) {
4845 TEST_REQUIRES_X86_AVX2;
4846 for (uint32_t n = 16; n <= 24; n += 8) {
4847 for (size_t k = 1; k <= 5; k += 2) {
4848 GemmMicrokernelTester()
4849 .mr(5)
4850 .nr(8)
4851 .kr(1)
4852 .sr(1)
4853 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004854 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08004855 .k(k)
4856 .ks(3)
4857 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4858 }
4859 }
4860 }
4861
4862 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, strided_cm_subtile) {
4863 TEST_REQUIRES_X86_AVX2;
4864 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004865 for (uint32_t n = 1; n <= 8; n++) {
4866 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004867 GemmMicrokernelTester()
4868 .mr(5)
4869 .nr(8)
4870 .kr(1)
4871 .sr(1)
4872 .m(m)
4873 .n(n)
4874 .k(k)
4875 .cm_stride(11)
4876 .iterations(1)
4877 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4878 }
4879 }
4880 }
4881 }
4882
4883 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, a_offset) {
4884 TEST_REQUIRES_X86_AVX2;
4885 for (size_t k = 1; k <= 5; k += 2) {
4886 GemmMicrokernelTester()
4887 .mr(5)
4888 .nr(8)
4889 .kr(1)
4890 .sr(1)
4891 .m(5)
4892 .n(8)
4893 .k(k)
4894 .ks(3)
4895 .a_offset(29)
4896 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4897 }
4898 }
4899
4900 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, zero) {
4901 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004902 for (size_t k = 1; k <= 5; k += 2) {
4903 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004904 GemmMicrokernelTester()
4905 .mr(5)
4906 .nr(8)
4907 .kr(1)
4908 .sr(1)
4909 .m(5)
4910 .n(8)
4911 .k(k)
4912 .ks(3)
4913 .a_offset(29)
4914 .zero_index(mz)
4915 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4916 }
4917 }
4918 }
4919
4920 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, qmin) {
4921 TEST_REQUIRES_X86_AVX2;
4922 GemmMicrokernelTester()
4923 .mr(5)
4924 .nr(8)
4925 .kr(1)
4926 .sr(1)
4927 .m(5)
4928 .n(8)
4929 .k(1)
4930 .qmin(128)
4931 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4932 }
4933
4934 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, qmax) {
4935 TEST_REQUIRES_X86_AVX2;
4936 GemmMicrokernelTester()
4937 .mr(5)
4938 .nr(8)
4939 .kr(1)
4940 .sr(1)
4941 .m(5)
4942 .n(8)
4943 .k(1)
4944 .qmax(128)
4945 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4946 }
4947
4948 TEST(F16_IGEMM_MINMAX_5X8__AVX2_BROADCAST, strided_cm) {
4949 TEST_REQUIRES_X86_AVX2;
4950 GemmMicrokernelTester()
4951 .mr(5)
4952 .nr(8)
4953 .kr(1)
4954 .sr(1)
4955 .m(5)
4956 .n(8)
4957 .k(1)
4958 .cm_stride(11)
4959 .Test(xnn_f16_igemm_minmax_ukernel_5x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4960 }
4961#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4962
4963
4964#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4965 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1) {
4966 TEST_REQUIRES_X86_AVX2;
4967 GemmMicrokernelTester()
4968 .mr(6)
4969 .nr(8)
4970 .kr(1)
4971 .sr(1)
4972 .m(6)
4973 .n(8)
4974 .k(1)
4975 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4976 }
4977
4978 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, strided_cn) {
4979 TEST_REQUIRES_X86_AVX2;
4980 GemmMicrokernelTester()
4981 .mr(6)
4982 .nr(8)
4983 .kr(1)
4984 .sr(1)
4985 .m(6)
4986 .n(8)
4987 .k(1)
4988 .cn_stride(11)
4989 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
4990 }
4991
4992 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_subtile) {
4993 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004994 for (uint32_t n = 1; n <= 8; n++) {
4995 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08004996 GemmMicrokernelTester()
4997 .mr(6)
4998 .nr(8)
4999 .kr(1)
5000 .sr(1)
5001 .m(m)
5002 .n(n)
5003 .k(1)
5004 .iterations(1)
5005 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5006 }
5007 }
5008 }
5009
5010 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
5011 TEST_REQUIRES_X86_AVX2;
5012 for (uint32_t m = 1; m <= 6; m++) {
5013 GemmMicrokernelTester()
5014 .mr(6)
5015 .nr(8)
5016 .kr(1)
5017 .sr(1)
5018 .m(m)
5019 .n(8)
5020 .k(1)
5021 .iterations(1)
5022 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5023 }
5024 }
5025
5026 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
5027 TEST_REQUIRES_X86_AVX2;
5028 for (uint32_t n = 1; n <= 8; n++) {
5029 GemmMicrokernelTester()
5030 .mr(6)
5031 .nr(8)
5032 .kr(1)
5033 .sr(1)
5034 .m(6)
5035 .n(n)
5036 .k(1)
5037 .iterations(1)
5038 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5039 }
5040 }
5041
5042 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, k_gt_1) {
5043 TEST_REQUIRES_X86_AVX2;
5044 for (size_t k = 2; k < 10; k++) {
5045 GemmMicrokernelTester()
5046 .mr(6)
5047 .nr(8)
5048 .kr(1)
5049 .sr(1)
5050 .m(6)
5051 .n(8)
5052 .k(k)
5053 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5054 }
5055 }
5056
5057 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, k_gt_1_subtile) {
5058 TEST_REQUIRES_X86_AVX2;
5059 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005060 for (uint32_t n = 1; n <= 8; n++) {
5061 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005062 GemmMicrokernelTester()
5063 .mr(6)
5064 .nr(8)
5065 .kr(1)
5066 .sr(1)
5067 .m(m)
5068 .n(n)
5069 .k(k)
5070 .iterations(1)
5071 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5072 }
5073 }
5074 }
5075 }
5076
5077 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8) {
5078 TEST_REQUIRES_X86_AVX2;
5079 for (uint32_t n = 9; n < 16; n++) {
5080 for (size_t k = 1; k <= 5; k += 2) {
5081 GemmMicrokernelTester()
5082 .mr(6)
5083 .nr(8)
5084 .kr(1)
5085 .sr(1)
5086 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005087 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005088 .k(k)
5089 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5090 }
5091 }
5092 }
5093
5094 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
5095 TEST_REQUIRES_X86_AVX2;
5096 for (uint32_t n = 9; n < 16; n++) {
5097 for (size_t k = 1; k <= 5; k += 2) {
5098 GemmMicrokernelTester()
5099 .mr(6)
5100 .nr(8)
5101 .kr(1)
5102 .sr(1)
5103 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005104 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005105 .k(k)
5106 .cn_stride(11)
5107 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5108 }
5109 }
5110 }
5111
5112 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8_subtile) {
5113 TEST_REQUIRES_X86_AVX2;
5114 for (uint32_t n = 9; n < 16; n++) {
5115 for (size_t k = 1; k <= 5; k += 2) {
5116 for (uint32_t m = 1; m <= 6; m++) {
5117 GemmMicrokernelTester()
5118 .mr(6)
5119 .nr(8)
5120 .kr(1)
5121 .sr(1)
5122 .m(m)
5123 .n(n)
5124 .k(k)
5125 .iterations(1)
5126 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5127 }
5128 }
5129 }
5130 }
5131
5132 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8) {
5133 TEST_REQUIRES_X86_AVX2;
5134 for (uint32_t n = 16; n <= 24; n += 8) {
5135 for (size_t k = 1; k <= 5; k += 2) {
5136 GemmMicrokernelTester()
5137 .mr(6)
5138 .nr(8)
5139 .kr(1)
5140 .sr(1)
5141 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005142 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005143 .k(k)
5144 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5145 }
5146 }
5147 }
5148
5149 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8_strided_cn) {
5150 TEST_REQUIRES_X86_AVX2;
5151 for (uint32_t n = 16; n <= 24; n += 8) {
5152 for (size_t k = 1; k <= 5; k += 2) {
5153 GemmMicrokernelTester()
5154 .mr(6)
5155 .nr(8)
5156 .kr(1)
5157 .sr(1)
5158 .m(6)
5159 .n(n)
5160 .k(k)
5161 .cn_stride(11)
5162 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5163 }
5164 }
5165 }
5166
5167 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8_subtile) {
5168 TEST_REQUIRES_X86_AVX2;
5169 for (uint32_t n = 16; n <= 24; n += 8) {
5170 for (size_t k = 1; k <= 5; k += 2) {
5171 for (uint32_t m = 1; m <= 6; m++) {
5172 GemmMicrokernelTester()
5173 .mr(6)
5174 .nr(8)
5175 .kr(1)
5176 .sr(1)
5177 .m(m)
5178 .n(n)
5179 .k(k)
5180 .iterations(1)
5181 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5182 }
5183 }
5184 }
5185 }
5186
5187 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, small_kernel) {
5188 TEST_REQUIRES_X86_AVX2;
5189 for (size_t k = 1; k <= 5; k += 2) {
5190 GemmMicrokernelTester()
5191 .mr(6)
5192 .nr(8)
5193 .kr(1)
5194 .sr(1)
5195 .m(6)
5196 .n(8)
5197 .k(k)
5198 .ks(3)
5199 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5200 }
5201 }
5202
5203 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, small_kernel_subtile) {
5204 TEST_REQUIRES_X86_AVX2;
5205 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005206 for (uint32_t n = 1; n <= 8; n++) {
5207 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005208 GemmMicrokernelTester()
5209 .mr(6)
5210 .nr(8)
5211 .kr(1)
5212 .sr(1)
5213 .m(m)
5214 .n(n)
5215 .k(k)
5216 .ks(3)
5217 .iterations(1)
5218 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5219 }
5220 }
5221 }
5222 }
5223
5224 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_gt_8_small_kernel) {
5225 TEST_REQUIRES_X86_AVX2;
5226 for (uint32_t n = 9; n < 16; n++) {
5227 for (size_t k = 1; k <= 5; k += 2) {
5228 GemmMicrokernelTester()
5229 .mr(6)
5230 .nr(8)
5231 .kr(1)
5232 .sr(1)
5233 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005234 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005235 .k(k)
5236 .ks(3)
5237 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5238 }
5239 }
5240 }
5241
5242 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, n_div_8_small_kernel) {
5243 TEST_REQUIRES_X86_AVX2;
5244 for (uint32_t n = 16; n <= 24; n += 8) {
5245 for (size_t k = 1; k <= 5; k += 2) {
5246 GemmMicrokernelTester()
5247 .mr(6)
5248 .nr(8)
5249 .kr(1)
5250 .sr(1)
5251 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005252 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005253 .k(k)
5254 .ks(3)
5255 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5256 }
5257 }
5258 }
5259
5260 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, strided_cm_subtile) {
5261 TEST_REQUIRES_X86_AVX2;
5262 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005263 for (uint32_t n = 1; n <= 8; n++) {
5264 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005265 GemmMicrokernelTester()
5266 .mr(6)
5267 .nr(8)
5268 .kr(1)
5269 .sr(1)
5270 .m(m)
5271 .n(n)
5272 .k(k)
5273 .cm_stride(11)
5274 .iterations(1)
5275 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5276 }
5277 }
5278 }
5279 }
5280
5281 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, a_offset) {
5282 TEST_REQUIRES_X86_AVX2;
5283 for (size_t k = 1; k <= 5; k += 2) {
5284 GemmMicrokernelTester()
5285 .mr(6)
5286 .nr(8)
5287 .kr(1)
5288 .sr(1)
5289 .m(6)
5290 .n(8)
5291 .k(k)
5292 .ks(3)
5293 .a_offset(37)
5294 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5295 }
5296 }
5297
5298 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, zero) {
5299 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005300 for (size_t k = 1; k <= 5; k += 2) {
5301 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005302 GemmMicrokernelTester()
5303 .mr(6)
5304 .nr(8)
5305 .kr(1)
5306 .sr(1)
5307 .m(6)
5308 .n(8)
5309 .k(k)
5310 .ks(3)
5311 .a_offset(37)
5312 .zero_index(mz)
5313 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5314 }
5315 }
5316 }
5317
5318 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, qmin) {
5319 TEST_REQUIRES_X86_AVX2;
5320 GemmMicrokernelTester()
5321 .mr(6)
5322 .nr(8)
5323 .kr(1)
5324 .sr(1)
5325 .m(6)
5326 .n(8)
5327 .k(1)
5328 .qmin(128)
5329 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5330 }
5331
5332 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, qmax) {
5333 TEST_REQUIRES_X86_AVX2;
5334 GemmMicrokernelTester()
5335 .mr(6)
5336 .nr(8)
5337 .kr(1)
5338 .sr(1)
5339 .m(6)
5340 .n(8)
5341 .k(1)
5342 .qmax(128)
5343 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5344 }
5345
5346 TEST(F16_IGEMM_MINMAX_6X8__AVX2_BROADCAST, strided_cm) {
5347 TEST_REQUIRES_X86_AVX2;
5348 GemmMicrokernelTester()
5349 .mr(6)
5350 .nr(8)
5351 .kr(1)
5352 .sr(1)
5353 .m(6)
5354 .n(8)
5355 .k(1)
5356 .cm_stride(11)
5357 .Test(xnn_f16_igemm_minmax_ukernel_6x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5358 }
5359#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5360
5361
5362#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5363 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1) {
5364 TEST_REQUIRES_X86_AVX2;
5365 GemmMicrokernelTester()
5366 .mr(7)
5367 .nr(8)
5368 .kr(1)
5369 .sr(1)
5370 .m(7)
5371 .n(8)
5372 .k(1)
5373 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5374 }
5375
5376 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, strided_cn) {
5377 TEST_REQUIRES_X86_AVX2;
5378 GemmMicrokernelTester()
5379 .mr(7)
5380 .nr(8)
5381 .kr(1)
5382 .sr(1)
5383 .m(7)
5384 .n(8)
5385 .k(1)
5386 .cn_stride(11)
5387 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5388 }
5389
5390 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_subtile) {
5391 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005392 for (uint32_t n = 1; n <= 8; n++) {
5393 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005394 GemmMicrokernelTester()
5395 .mr(7)
5396 .nr(8)
5397 .kr(1)
5398 .sr(1)
5399 .m(m)
5400 .n(n)
5401 .k(1)
5402 .iterations(1)
5403 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5404 }
5405 }
5406 }
5407
5408 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_subtile_m) {
5409 TEST_REQUIRES_X86_AVX2;
5410 for (uint32_t m = 1; m <= 7; m++) {
5411 GemmMicrokernelTester()
5412 .mr(7)
5413 .nr(8)
5414 .kr(1)
5415 .sr(1)
5416 .m(m)
5417 .n(8)
5418 .k(1)
5419 .iterations(1)
5420 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5421 }
5422 }
5423
5424 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, k_eq_1_subtile_n) {
5425 TEST_REQUIRES_X86_AVX2;
5426 for (uint32_t n = 1; n <= 8; n++) {
5427 GemmMicrokernelTester()
5428 .mr(7)
5429 .nr(8)
5430 .kr(1)
5431 .sr(1)
5432 .m(7)
5433 .n(n)
5434 .k(1)
5435 .iterations(1)
5436 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5437 }
5438 }
5439
5440 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, k_gt_1) {
5441 TEST_REQUIRES_X86_AVX2;
5442 for (size_t k = 2; k < 10; k++) {
5443 GemmMicrokernelTester()
5444 .mr(7)
5445 .nr(8)
5446 .kr(1)
5447 .sr(1)
5448 .m(7)
5449 .n(8)
5450 .k(k)
5451 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5452 }
5453 }
5454
5455 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, k_gt_1_subtile) {
5456 TEST_REQUIRES_X86_AVX2;
5457 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005458 for (uint32_t n = 1; n <= 8; n++) {
5459 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005460 GemmMicrokernelTester()
5461 .mr(7)
5462 .nr(8)
5463 .kr(1)
5464 .sr(1)
5465 .m(m)
5466 .n(n)
5467 .k(k)
5468 .iterations(1)
5469 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5470 }
5471 }
5472 }
5473 }
5474
5475 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8) {
5476 TEST_REQUIRES_X86_AVX2;
5477 for (uint32_t n = 9; n < 16; n++) {
5478 for (size_t k = 1; k <= 5; k += 2) {
5479 GemmMicrokernelTester()
5480 .mr(7)
5481 .nr(8)
5482 .kr(1)
5483 .sr(1)
5484 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005485 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005486 .k(k)
5487 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5488 }
5489 }
5490 }
5491
5492 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8_strided_cn) {
5493 TEST_REQUIRES_X86_AVX2;
5494 for (uint32_t n = 9; n < 16; n++) {
5495 for (size_t k = 1; k <= 5; k += 2) {
5496 GemmMicrokernelTester()
5497 .mr(7)
5498 .nr(8)
5499 .kr(1)
5500 .sr(1)
5501 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005502 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005503 .k(k)
5504 .cn_stride(11)
5505 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5506 }
5507 }
5508 }
5509
5510 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8_subtile) {
5511 TEST_REQUIRES_X86_AVX2;
5512 for (uint32_t n = 9; n < 16; n++) {
5513 for (size_t k = 1; k <= 5; k += 2) {
5514 for (uint32_t m = 1; m <= 7; m++) {
5515 GemmMicrokernelTester()
5516 .mr(7)
5517 .nr(8)
5518 .kr(1)
5519 .sr(1)
5520 .m(m)
5521 .n(n)
5522 .k(k)
5523 .iterations(1)
5524 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5525 }
5526 }
5527 }
5528 }
5529
5530 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8) {
5531 TEST_REQUIRES_X86_AVX2;
5532 for (uint32_t n = 16; n <= 24; n += 8) {
5533 for (size_t k = 1; k <= 5; k += 2) {
5534 GemmMicrokernelTester()
5535 .mr(7)
5536 .nr(8)
5537 .kr(1)
5538 .sr(1)
5539 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005540 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005541 .k(k)
5542 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5543 }
5544 }
5545 }
5546
5547 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8_strided_cn) {
5548 TEST_REQUIRES_X86_AVX2;
5549 for (uint32_t n = 16; n <= 24; n += 8) {
5550 for (size_t k = 1; k <= 5; k += 2) {
5551 GemmMicrokernelTester()
5552 .mr(7)
5553 .nr(8)
5554 .kr(1)
5555 .sr(1)
5556 .m(7)
5557 .n(n)
5558 .k(k)
5559 .cn_stride(11)
5560 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5561 }
5562 }
5563 }
5564
5565 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8_subtile) {
5566 TEST_REQUIRES_X86_AVX2;
5567 for (uint32_t n = 16; n <= 24; n += 8) {
5568 for (size_t k = 1; k <= 5; k += 2) {
5569 for (uint32_t m = 1; m <= 7; m++) {
5570 GemmMicrokernelTester()
5571 .mr(7)
5572 .nr(8)
5573 .kr(1)
5574 .sr(1)
5575 .m(m)
5576 .n(n)
5577 .k(k)
5578 .iterations(1)
5579 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5580 }
5581 }
5582 }
5583 }
5584
5585 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, small_kernel) {
5586 TEST_REQUIRES_X86_AVX2;
5587 for (size_t k = 1; k <= 5; k += 2) {
5588 GemmMicrokernelTester()
5589 .mr(7)
5590 .nr(8)
5591 .kr(1)
5592 .sr(1)
5593 .m(7)
5594 .n(8)
5595 .k(k)
5596 .ks(3)
5597 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5598 }
5599 }
5600
5601 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, small_kernel_subtile) {
5602 TEST_REQUIRES_X86_AVX2;
5603 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005604 for (uint32_t n = 1; n <= 8; n++) {
5605 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005606 GemmMicrokernelTester()
5607 .mr(7)
5608 .nr(8)
5609 .kr(1)
5610 .sr(1)
5611 .m(m)
5612 .n(n)
5613 .k(k)
5614 .ks(3)
5615 .iterations(1)
5616 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5617 }
5618 }
5619 }
5620 }
5621
5622 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_gt_8_small_kernel) {
5623 TEST_REQUIRES_X86_AVX2;
5624 for (uint32_t n = 9; n < 16; n++) {
5625 for (size_t k = 1; k <= 5; k += 2) {
5626 GemmMicrokernelTester()
5627 .mr(7)
5628 .nr(8)
5629 .kr(1)
5630 .sr(1)
5631 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005632 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005633 .k(k)
5634 .ks(3)
5635 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5636 }
5637 }
5638 }
5639
5640 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, n_div_8_small_kernel) {
5641 TEST_REQUIRES_X86_AVX2;
5642 for (uint32_t n = 16; n <= 24; n += 8) {
5643 for (size_t k = 1; k <= 5; k += 2) {
5644 GemmMicrokernelTester()
5645 .mr(7)
5646 .nr(8)
5647 .kr(1)
5648 .sr(1)
5649 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005650 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005651 .k(k)
5652 .ks(3)
5653 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5654 }
5655 }
5656 }
5657
5658 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, strided_cm_subtile) {
5659 TEST_REQUIRES_X86_AVX2;
5660 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005661 for (uint32_t n = 1; n <= 8; n++) {
5662 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005663 GemmMicrokernelTester()
5664 .mr(7)
5665 .nr(8)
5666 .kr(1)
5667 .sr(1)
5668 .m(m)
5669 .n(n)
5670 .k(k)
5671 .cm_stride(11)
5672 .iterations(1)
5673 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5674 }
5675 }
5676 }
5677 }
5678
5679 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, a_offset) {
5680 TEST_REQUIRES_X86_AVX2;
5681 for (size_t k = 1; k <= 5; k += 2) {
5682 GemmMicrokernelTester()
5683 .mr(7)
5684 .nr(8)
5685 .kr(1)
5686 .sr(1)
5687 .m(7)
5688 .n(8)
5689 .k(k)
5690 .ks(3)
5691 .a_offset(37)
5692 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5693 }
5694 }
5695
5696 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, zero) {
5697 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005698 for (size_t k = 1; k <= 5; k += 2) {
5699 for (uint32_t mz = 0; mz < 7; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005700 GemmMicrokernelTester()
5701 .mr(7)
5702 .nr(8)
5703 .kr(1)
5704 .sr(1)
5705 .m(7)
5706 .n(8)
5707 .k(k)
5708 .ks(3)
5709 .a_offset(37)
5710 .zero_index(mz)
5711 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5712 }
5713 }
5714 }
5715
5716 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, qmin) {
5717 TEST_REQUIRES_X86_AVX2;
5718 GemmMicrokernelTester()
5719 .mr(7)
5720 .nr(8)
5721 .kr(1)
5722 .sr(1)
5723 .m(7)
5724 .n(8)
5725 .k(1)
5726 .qmin(128)
5727 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5728 }
5729
5730 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, qmax) {
5731 TEST_REQUIRES_X86_AVX2;
5732 GemmMicrokernelTester()
5733 .mr(7)
5734 .nr(8)
5735 .kr(1)
5736 .sr(1)
5737 .m(7)
5738 .n(8)
5739 .k(1)
5740 .qmax(128)
5741 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5742 }
5743
5744 TEST(F16_IGEMM_MINMAX_7X8__AVX2_BROADCAST, strided_cm) {
5745 TEST_REQUIRES_X86_AVX2;
5746 GemmMicrokernelTester()
5747 .mr(7)
5748 .nr(8)
5749 .kr(1)
5750 .sr(1)
5751 .m(7)
5752 .n(8)
5753 .k(1)
5754 .cm_stride(11)
5755 .Test(xnn_f16_igemm_minmax_ukernel_7x8__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5756 }
5757#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5758
5759
5760#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5761 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1) {
5762 TEST_REQUIRES_X86_AVX2;
5763 GemmMicrokernelTester()
5764 .mr(1)
5765 .nr(16)
5766 .kr(1)
5767 .sr(1)
5768 .m(1)
5769 .n(16)
5770 .k(1)
5771 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5772 }
5773
5774 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, strided_cn) {
5775 TEST_REQUIRES_X86_AVX2;
5776 GemmMicrokernelTester()
5777 .mr(1)
5778 .nr(16)
5779 .kr(1)
5780 .sr(1)
5781 .m(1)
5782 .n(16)
5783 .k(1)
5784 .cn_stride(19)
5785 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5786 }
5787
5788 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_subtile) {
5789 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005790 for (uint32_t n = 1; n <= 16; n++) {
5791 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005792 GemmMicrokernelTester()
5793 .mr(1)
5794 .nr(16)
5795 .kr(1)
5796 .sr(1)
5797 .m(m)
5798 .n(n)
5799 .k(1)
5800 .iterations(1)
5801 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5802 }
5803 }
5804 }
5805
5806 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
5807 TEST_REQUIRES_X86_AVX2;
5808 for (uint32_t m = 1; m <= 1; m++) {
5809 GemmMicrokernelTester()
5810 .mr(1)
5811 .nr(16)
5812 .kr(1)
5813 .sr(1)
5814 .m(m)
5815 .n(16)
5816 .k(1)
5817 .iterations(1)
5818 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5819 }
5820 }
5821
5822 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
5823 TEST_REQUIRES_X86_AVX2;
5824 for (uint32_t n = 1; n <= 16; n++) {
5825 GemmMicrokernelTester()
5826 .mr(1)
5827 .nr(16)
5828 .kr(1)
5829 .sr(1)
5830 .m(1)
5831 .n(n)
5832 .k(1)
5833 .iterations(1)
5834 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5835 }
5836 }
5837
5838 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, k_gt_1) {
5839 TEST_REQUIRES_X86_AVX2;
5840 for (size_t k = 2; k < 10; k++) {
5841 GemmMicrokernelTester()
5842 .mr(1)
5843 .nr(16)
5844 .kr(1)
5845 .sr(1)
5846 .m(1)
5847 .n(16)
5848 .k(k)
5849 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5850 }
5851 }
5852
5853 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, k_gt_1_subtile) {
5854 TEST_REQUIRES_X86_AVX2;
5855 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005856 for (uint32_t n = 1; n <= 16; n++) {
5857 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08005858 GemmMicrokernelTester()
5859 .mr(1)
5860 .nr(16)
5861 .kr(1)
5862 .sr(1)
5863 .m(m)
5864 .n(n)
5865 .k(k)
5866 .iterations(1)
5867 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5868 }
5869 }
5870 }
5871 }
5872
5873 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16) {
5874 TEST_REQUIRES_X86_AVX2;
5875 for (uint32_t n = 17; n < 32; n++) {
5876 for (size_t k = 1; k <= 5; k += 2) {
5877 GemmMicrokernelTester()
5878 .mr(1)
5879 .nr(16)
5880 .kr(1)
5881 .sr(1)
5882 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005883 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005884 .k(k)
5885 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5886 }
5887 }
5888 }
5889
5890 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
5891 TEST_REQUIRES_X86_AVX2;
5892 for (uint32_t n = 17; n < 32; n++) {
5893 for (size_t k = 1; k <= 5; k += 2) {
5894 GemmMicrokernelTester()
5895 .mr(1)
5896 .nr(16)
5897 .kr(1)
5898 .sr(1)
5899 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005900 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005901 .k(k)
5902 .cn_stride(19)
5903 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5904 }
5905 }
5906 }
5907
5908 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16_subtile) {
5909 TEST_REQUIRES_X86_AVX2;
5910 for (uint32_t n = 17; n < 32; n++) {
5911 for (size_t k = 1; k <= 5; k += 2) {
5912 for (uint32_t m = 1; m <= 1; m++) {
5913 GemmMicrokernelTester()
5914 .mr(1)
5915 .nr(16)
5916 .kr(1)
5917 .sr(1)
5918 .m(m)
5919 .n(n)
5920 .k(k)
5921 .iterations(1)
5922 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5923 }
5924 }
5925 }
5926 }
5927
5928 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16) {
5929 TEST_REQUIRES_X86_AVX2;
5930 for (uint32_t n = 32; n <= 48; n += 16) {
5931 for (size_t k = 1; k <= 5; k += 2) {
5932 GemmMicrokernelTester()
5933 .mr(1)
5934 .nr(16)
5935 .kr(1)
5936 .sr(1)
5937 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005938 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08005939 .k(k)
5940 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5941 }
5942 }
5943 }
5944
5945 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16_strided_cn) {
5946 TEST_REQUIRES_X86_AVX2;
5947 for (uint32_t n = 32; n <= 48; n += 16) {
5948 for (size_t k = 1; k <= 5; k += 2) {
5949 GemmMicrokernelTester()
5950 .mr(1)
5951 .nr(16)
5952 .kr(1)
5953 .sr(1)
5954 .m(1)
5955 .n(n)
5956 .k(k)
5957 .cn_stride(19)
5958 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5959 }
5960 }
5961 }
5962
5963 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16_subtile) {
5964 TEST_REQUIRES_X86_AVX2;
5965 for (uint32_t n = 32; n <= 48; n += 16) {
5966 for (size_t k = 1; k <= 5; k += 2) {
5967 for (uint32_t m = 1; m <= 1; m++) {
5968 GemmMicrokernelTester()
5969 .mr(1)
5970 .nr(16)
5971 .kr(1)
5972 .sr(1)
5973 .m(m)
5974 .n(n)
5975 .k(k)
5976 .iterations(1)
5977 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5978 }
5979 }
5980 }
5981 }
5982
5983 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, small_kernel) {
5984 TEST_REQUIRES_X86_AVX2;
5985 for (size_t k = 1; k <= 5; k += 2) {
5986 GemmMicrokernelTester()
5987 .mr(1)
5988 .nr(16)
5989 .kr(1)
5990 .sr(1)
5991 .m(1)
5992 .n(16)
5993 .k(k)
5994 .ks(3)
5995 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
5996 }
5997 }
5998
5999 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, small_kernel_subtile) {
6000 TEST_REQUIRES_X86_AVX2;
6001 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006002 for (uint32_t n = 1; n <= 16; n++) {
6003 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006004 GemmMicrokernelTester()
6005 .mr(1)
6006 .nr(16)
6007 .kr(1)
6008 .sr(1)
6009 .m(m)
6010 .n(n)
6011 .k(k)
6012 .ks(3)
6013 .iterations(1)
6014 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6015 }
6016 }
6017 }
6018 }
6019
6020 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_gt_16_small_kernel) {
6021 TEST_REQUIRES_X86_AVX2;
6022 for (uint32_t n = 17; n < 32; n++) {
6023 for (size_t k = 1; k <= 5; k += 2) {
6024 GemmMicrokernelTester()
6025 .mr(1)
6026 .nr(16)
6027 .kr(1)
6028 .sr(1)
6029 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006030 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006031 .k(k)
6032 .ks(3)
6033 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6034 }
6035 }
6036 }
6037
6038 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, n_div_16_small_kernel) {
6039 TEST_REQUIRES_X86_AVX2;
6040 for (uint32_t n = 32; n <= 48; n += 16) {
6041 for (size_t k = 1; k <= 5; k += 2) {
6042 GemmMicrokernelTester()
6043 .mr(1)
6044 .nr(16)
6045 .kr(1)
6046 .sr(1)
6047 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006048 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006049 .k(k)
6050 .ks(3)
6051 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6052 }
6053 }
6054 }
6055
6056 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, strided_cm_subtile) {
6057 TEST_REQUIRES_X86_AVX2;
6058 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006059 for (uint32_t n = 1; n <= 16; n++) {
6060 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006061 GemmMicrokernelTester()
6062 .mr(1)
6063 .nr(16)
6064 .kr(1)
6065 .sr(1)
6066 .m(m)
6067 .n(n)
6068 .k(k)
6069 .cm_stride(19)
6070 .iterations(1)
6071 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6072 }
6073 }
6074 }
6075 }
6076
6077 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, a_offset) {
6078 TEST_REQUIRES_X86_AVX2;
6079 for (size_t k = 1; k <= 5; k += 2) {
6080 GemmMicrokernelTester()
6081 .mr(1)
6082 .nr(16)
6083 .kr(1)
6084 .sr(1)
6085 .m(1)
6086 .n(16)
6087 .k(k)
6088 .ks(3)
6089 .a_offset(7)
6090 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6091 }
6092 }
6093
6094 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, zero) {
6095 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006096 for (size_t k = 1; k <= 5; k += 2) {
6097 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006098 GemmMicrokernelTester()
6099 .mr(1)
6100 .nr(16)
6101 .kr(1)
6102 .sr(1)
6103 .m(1)
6104 .n(16)
6105 .k(k)
6106 .ks(3)
6107 .a_offset(7)
6108 .zero_index(mz)
6109 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6110 }
6111 }
6112 }
6113
6114 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, qmin) {
6115 TEST_REQUIRES_X86_AVX2;
6116 GemmMicrokernelTester()
6117 .mr(1)
6118 .nr(16)
6119 .kr(1)
6120 .sr(1)
6121 .m(1)
6122 .n(16)
6123 .k(1)
6124 .qmin(128)
6125 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6126 }
6127
6128 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, qmax) {
6129 TEST_REQUIRES_X86_AVX2;
6130 GemmMicrokernelTester()
6131 .mr(1)
6132 .nr(16)
6133 .kr(1)
6134 .sr(1)
6135 .m(1)
6136 .n(16)
6137 .k(1)
6138 .qmax(128)
6139 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6140 }
6141
6142 TEST(F16_IGEMM_MINMAX_1X16__AVX2_BROADCAST, strided_cm) {
6143 TEST_REQUIRES_X86_AVX2;
6144 GemmMicrokernelTester()
6145 .mr(1)
6146 .nr(16)
6147 .kr(1)
6148 .sr(1)
6149 .m(1)
6150 .n(16)
6151 .k(1)
6152 .cm_stride(19)
6153 .Test(xnn_f16_igemm_minmax_ukernel_1x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6154 }
6155#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6156
6157
6158#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6159 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1) {
6160 TEST_REQUIRES_X86_AVX2;
6161 GemmMicrokernelTester()
6162 .mr(3)
6163 .nr(16)
6164 .kr(1)
6165 .sr(1)
6166 .m(3)
6167 .n(16)
6168 .k(1)
6169 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6170 }
6171
6172 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, strided_cn) {
6173 TEST_REQUIRES_X86_AVX2;
6174 GemmMicrokernelTester()
6175 .mr(3)
6176 .nr(16)
6177 .kr(1)
6178 .sr(1)
6179 .m(3)
6180 .n(16)
6181 .k(1)
6182 .cn_stride(19)
6183 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6184 }
6185
6186 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_subtile) {
6187 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006188 for (uint32_t n = 1; n <= 16; n++) {
6189 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006190 GemmMicrokernelTester()
6191 .mr(3)
6192 .nr(16)
6193 .kr(1)
6194 .sr(1)
6195 .m(m)
6196 .n(n)
6197 .k(1)
6198 .iterations(1)
6199 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6200 }
6201 }
6202 }
6203
6204 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
6205 TEST_REQUIRES_X86_AVX2;
6206 for (uint32_t m = 1; m <= 3; m++) {
6207 GemmMicrokernelTester()
6208 .mr(3)
6209 .nr(16)
6210 .kr(1)
6211 .sr(1)
6212 .m(m)
6213 .n(16)
6214 .k(1)
6215 .iterations(1)
6216 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6217 }
6218 }
6219
6220 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
6221 TEST_REQUIRES_X86_AVX2;
6222 for (uint32_t n = 1; n <= 16; n++) {
6223 GemmMicrokernelTester()
6224 .mr(3)
6225 .nr(16)
6226 .kr(1)
6227 .sr(1)
6228 .m(3)
6229 .n(n)
6230 .k(1)
6231 .iterations(1)
6232 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6233 }
6234 }
6235
6236 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, k_gt_1) {
6237 TEST_REQUIRES_X86_AVX2;
6238 for (size_t k = 2; k < 10; k++) {
6239 GemmMicrokernelTester()
6240 .mr(3)
6241 .nr(16)
6242 .kr(1)
6243 .sr(1)
6244 .m(3)
6245 .n(16)
6246 .k(k)
6247 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6248 }
6249 }
6250
6251 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, k_gt_1_subtile) {
6252 TEST_REQUIRES_X86_AVX2;
6253 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006254 for (uint32_t n = 1; n <= 16; n++) {
6255 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006256 GemmMicrokernelTester()
6257 .mr(3)
6258 .nr(16)
6259 .kr(1)
6260 .sr(1)
6261 .m(m)
6262 .n(n)
6263 .k(k)
6264 .iterations(1)
6265 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6266 }
6267 }
6268 }
6269 }
6270
6271 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16) {
6272 TEST_REQUIRES_X86_AVX2;
6273 for (uint32_t n = 17; n < 32; n++) {
6274 for (size_t k = 1; k <= 5; k += 2) {
6275 GemmMicrokernelTester()
6276 .mr(3)
6277 .nr(16)
6278 .kr(1)
6279 .sr(1)
6280 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006281 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006282 .k(k)
6283 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6284 }
6285 }
6286 }
6287
6288 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
6289 TEST_REQUIRES_X86_AVX2;
6290 for (uint32_t n = 17; n < 32; n++) {
6291 for (size_t k = 1; k <= 5; k += 2) {
6292 GemmMicrokernelTester()
6293 .mr(3)
6294 .nr(16)
6295 .kr(1)
6296 .sr(1)
6297 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006298 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006299 .k(k)
6300 .cn_stride(19)
6301 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6302 }
6303 }
6304 }
6305
6306 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16_subtile) {
6307 TEST_REQUIRES_X86_AVX2;
6308 for (uint32_t n = 17; n < 32; n++) {
6309 for (size_t k = 1; k <= 5; k += 2) {
6310 for (uint32_t m = 1; m <= 3; m++) {
6311 GemmMicrokernelTester()
6312 .mr(3)
6313 .nr(16)
6314 .kr(1)
6315 .sr(1)
6316 .m(m)
6317 .n(n)
6318 .k(k)
6319 .iterations(1)
6320 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6321 }
6322 }
6323 }
6324 }
6325
6326 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16) {
6327 TEST_REQUIRES_X86_AVX2;
6328 for (uint32_t n = 32; n <= 48; n += 16) {
6329 for (size_t k = 1; k <= 5; k += 2) {
6330 GemmMicrokernelTester()
6331 .mr(3)
6332 .nr(16)
6333 .kr(1)
6334 .sr(1)
6335 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006336 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006337 .k(k)
6338 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6339 }
6340 }
6341 }
6342
6343 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16_strided_cn) {
6344 TEST_REQUIRES_X86_AVX2;
6345 for (uint32_t n = 32; n <= 48; n += 16) {
6346 for (size_t k = 1; k <= 5; k += 2) {
6347 GemmMicrokernelTester()
6348 .mr(3)
6349 .nr(16)
6350 .kr(1)
6351 .sr(1)
6352 .m(3)
6353 .n(n)
6354 .k(k)
6355 .cn_stride(19)
6356 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6357 }
6358 }
6359 }
6360
6361 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16_subtile) {
6362 TEST_REQUIRES_X86_AVX2;
6363 for (uint32_t n = 32; n <= 48; n += 16) {
6364 for (size_t k = 1; k <= 5; k += 2) {
6365 for (uint32_t m = 1; m <= 3; m++) {
6366 GemmMicrokernelTester()
6367 .mr(3)
6368 .nr(16)
6369 .kr(1)
6370 .sr(1)
6371 .m(m)
6372 .n(n)
6373 .k(k)
6374 .iterations(1)
6375 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6376 }
6377 }
6378 }
6379 }
6380
6381 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, small_kernel) {
6382 TEST_REQUIRES_X86_AVX2;
6383 for (size_t k = 1; k <= 5; k += 2) {
6384 GemmMicrokernelTester()
6385 .mr(3)
6386 .nr(16)
6387 .kr(1)
6388 .sr(1)
6389 .m(3)
6390 .n(16)
6391 .k(k)
6392 .ks(3)
6393 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6394 }
6395 }
6396
6397 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, small_kernel_subtile) {
6398 TEST_REQUIRES_X86_AVX2;
6399 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006400 for (uint32_t n = 1; n <= 16; n++) {
6401 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006402 GemmMicrokernelTester()
6403 .mr(3)
6404 .nr(16)
6405 .kr(1)
6406 .sr(1)
6407 .m(m)
6408 .n(n)
6409 .k(k)
6410 .ks(3)
6411 .iterations(1)
6412 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6413 }
6414 }
6415 }
6416 }
6417
6418 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_gt_16_small_kernel) {
6419 TEST_REQUIRES_X86_AVX2;
6420 for (uint32_t n = 17; n < 32; n++) {
6421 for (size_t k = 1; k <= 5; k += 2) {
6422 GemmMicrokernelTester()
6423 .mr(3)
6424 .nr(16)
6425 .kr(1)
6426 .sr(1)
6427 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006428 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006429 .k(k)
6430 .ks(3)
6431 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6432 }
6433 }
6434 }
6435
6436 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, n_div_16_small_kernel) {
6437 TEST_REQUIRES_X86_AVX2;
6438 for (uint32_t n = 32; n <= 48; n += 16) {
6439 for (size_t k = 1; k <= 5; k += 2) {
6440 GemmMicrokernelTester()
6441 .mr(3)
6442 .nr(16)
6443 .kr(1)
6444 .sr(1)
6445 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006446 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006447 .k(k)
6448 .ks(3)
6449 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6450 }
6451 }
6452 }
6453
6454 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, strided_cm_subtile) {
6455 TEST_REQUIRES_X86_AVX2;
6456 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006457 for (uint32_t n = 1; n <= 16; n++) {
6458 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006459 GemmMicrokernelTester()
6460 .mr(3)
6461 .nr(16)
6462 .kr(1)
6463 .sr(1)
6464 .m(m)
6465 .n(n)
6466 .k(k)
6467 .cm_stride(19)
6468 .iterations(1)
6469 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6470 }
6471 }
6472 }
6473 }
6474
6475 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, a_offset) {
6476 TEST_REQUIRES_X86_AVX2;
6477 for (size_t k = 1; k <= 5; k += 2) {
6478 GemmMicrokernelTester()
6479 .mr(3)
6480 .nr(16)
6481 .kr(1)
6482 .sr(1)
6483 .m(3)
6484 .n(16)
6485 .k(k)
6486 .ks(3)
6487 .a_offset(17)
6488 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6489 }
6490 }
6491
6492 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, zero) {
6493 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006494 for (size_t k = 1; k <= 5; k += 2) {
6495 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006496 GemmMicrokernelTester()
6497 .mr(3)
6498 .nr(16)
6499 .kr(1)
6500 .sr(1)
6501 .m(3)
6502 .n(16)
6503 .k(k)
6504 .ks(3)
6505 .a_offset(17)
6506 .zero_index(mz)
6507 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6508 }
6509 }
6510 }
6511
6512 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, qmin) {
6513 TEST_REQUIRES_X86_AVX2;
6514 GemmMicrokernelTester()
6515 .mr(3)
6516 .nr(16)
6517 .kr(1)
6518 .sr(1)
6519 .m(3)
6520 .n(16)
6521 .k(1)
6522 .qmin(128)
6523 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6524 }
6525
6526 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, qmax) {
6527 TEST_REQUIRES_X86_AVX2;
6528 GemmMicrokernelTester()
6529 .mr(3)
6530 .nr(16)
6531 .kr(1)
6532 .sr(1)
6533 .m(3)
6534 .n(16)
6535 .k(1)
6536 .qmax(128)
6537 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6538 }
6539
6540 TEST(F16_IGEMM_MINMAX_3X16__AVX2_BROADCAST, strided_cm) {
6541 TEST_REQUIRES_X86_AVX2;
6542 GemmMicrokernelTester()
6543 .mr(3)
6544 .nr(16)
6545 .kr(1)
6546 .sr(1)
6547 .m(3)
6548 .n(16)
6549 .k(1)
6550 .cm_stride(19)
6551 .Test(xnn_f16_igemm_minmax_ukernel_3x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6552 }
6553#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6554
6555
6556#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6557 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1) {
6558 TEST_REQUIRES_X86_AVX2;
6559 GemmMicrokernelTester()
6560 .mr(4)
6561 .nr(16)
6562 .kr(1)
6563 .sr(1)
6564 .m(4)
6565 .n(16)
6566 .k(1)
6567 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6568 }
6569
6570 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, strided_cn) {
6571 TEST_REQUIRES_X86_AVX2;
6572 GemmMicrokernelTester()
6573 .mr(4)
6574 .nr(16)
6575 .kr(1)
6576 .sr(1)
6577 .m(4)
6578 .n(16)
6579 .k(1)
6580 .cn_stride(19)
6581 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6582 }
6583
6584 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_subtile) {
6585 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006586 for (uint32_t n = 1; n <= 16; n++) {
6587 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006588 GemmMicrokernelTester()
6589 .mr(4)
6590 .nr(16)
6591 .kr(1)
6592 .sr(1)
6593 .m(m)
6594 .n(n)
6595 .k(1)
6596 .iterations(1)
6597 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6598 }
6599 }
6600 }
6601
6602 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
6603 TEST_REQUIRES_X86_AVX2;
6604 for (uint32_t m = 1; m <= 4; m++) {
6605 GemmMicrokernelTester()
6606 .mr(4)
6607 .nr(16)
6608 .kr(1)
6609 .sr(1)
6610 .m(m)
6611 .n(16)
6612 .k(1)
6613 .iterations(1)
6614 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6615 }
6616 }
6617
6618 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
6619 TEST_REQUIRES_X86_AVX2;
6620 for (uint32_t n = 1; n <= 16; n++) {
6621 GemmMicrokernelTester()
6622 .mr(4)
6623 .nr(16)
6624 .kr(1)
6625 .sr(1)
6626 .m(4)
6627 .n(n)
6628 .k(1)
6629 .iterations(1)
6630 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6631 }
6632 }
6633
6634 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, k_gt_1) {
6635 TEST_REQUIRES_X86_AVX2;
6636 for (size_t k = 2; k < 10; k++) {
6637 GemmMicrokernelTester()
6638 .mr(4)
6639 .nr(16)
6640 .kr(1)
6641 .sr(1)
6642 .m(4)
6643 .n(16)
6644 .k(k)
6645 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6646 }
6647 }
6648
6649 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, k_gt_1_subtile) {
6650 TEST_REQUIRES_X86_AVX2;
6651 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006652 for (uint32_t n = 1; n <= 16; n++) {
6653 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006654 GemmMicrokernelTester()
6655 .mr(4)
6656 .nr(16)
6657 .kr(1)
6658 .sr(1)
6659 .m(m)
6660 .n(n)
6661 .k(k)
6662 .iterations(1)
6663 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6664 }
6665 }
6666 }
6667 }
6668
6669 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16) {
6670 TEST_REQUIRES_X86_AVX2;
6671 for (uint32_t n = 17; n < 32; n++) {
6672 for (size_t k = 1; k <= 5; k += 2) {
6673 GemmMicrokernelTester()
6674 .mr(4)
6675 .nr(16)
6676 .kr(1)
6677 .sr(1)
6678 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006679 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006680 .k(k)
6681 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6682 }
6683 }
6684 }
6685
6686 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
6687 TEST_REQUIRES_X86_AVX2;
6688 for (uint32_t n = 17; n < 32; n++) {
6689 for (size_t k = 1; k <= 5; k += 2) {
6690 GemmMicrokernelTester()
6691 .mr(4)
6692 .nr(16)
6693 .kr(1)
6694 .sr(1)
6695 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006696 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006697 .k(k)
6698 .cn_stride(19)
6699 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6700 }
6701 }
6702 }
6703
6704 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16_subtile) {
6705 TEST_REQUIRES_X86_AVX2;
6706 for (uint32_t n = 17; n < 32; n++) {
6707 for (size_t k = 1; k <= 5; k += 2) {
6708 for (uint32_t m = 1; m <= 4; m++) {
6709 GemmMicrokernelTester()
6710 .mr(4)
6711 .nr(16)
6712 .kr(1)
6713 .sr(1)
6714 .m(m)
6715 .n(n)
6716 .k(k)
6717 .iterations(1)
6718 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6719 }
6720 }
6721 }
6722 }
6723
6724 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16) {
6725 TEST_REQUIRES_X86_AVX2;
6726 for (uint32_t n = 32; n <= 48; n += 16) {
6727 for (size_t k = 1; k <= 5; k += 2) {
6728 GemmMicrokernelTester()
6729 .mr(4)
6730 .nr(16)
6731 .kr(1)
6732 .sr(1)
6733 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006734 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006735 .k(k)
6736 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6737 }
6738 }
6739 }
6740
6741 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16_strided_cn) {
6742 TEST_REQUIRES_X86_AVX2;
6743 for (uint32_t n = 32; n <= 48; n += 16) {
6744 for (size_t k = 1; k <= 5; k += 2) {
6745 GemmMicrokernelTester()
6746 .mr(4)
6747 .nr(16)
6748 .kr(1)
6749 .sr(1)
6750 .m(4)
6751 .n(n)
6752 .k(k)
6753 .cn_stride(19)
6754 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6755 }
6756 }
6757 }
6758
6759 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16_subtile) {
6760 TEST_REQUIRES_X86_AVX2;
6761 for (uint32_t n = 32; n <= 48; n += 16) {
6762 for (size_t k = 1; k <= 5; k += 2) {
6763 for (uint32_t m = 1; m <= 4; m++) {
6764 GemmMicrokernelTester()
6765 .mr(4)
6766 .nr(16)
6767 .kr(1)
6768 .sr(1)
6769 .m(m)
6770 .n(n)
6771 .k(k)
6772 .iterations(1)
6773 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6774 }
6775 }
6776 }
6777 }
6778
6779 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, small_kernel) {
6780 TEST_REQUIRES_X86_AVX2;
6781 for (size_t k = 1; k <= 5; k += 2) {
6782 GemmMicrokernelTester()
6783 .mr(4)
6784 .nr(16)
6785 .kr(1)
6786 .sr(1)
6787 .m(4)
6788 .n(16)
6789 .k(k)
6790 .ks(3)
6791 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6792 }
6793 }
6794
6795 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, small_kernel_subtile) {
6796 TEST_REQUIRES_X86_AVX2;
6797 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006798 for (uint32_t n = 1; n <= 16; n++) {
6799 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006800 GemmMicrokernelTester()
6801 .mr(4)
6802 .nr(16)
6803 .kr(1)
6804 .sr(1)
6805 .m(m)
6806 .n(n)
6807 .k(k)
6808 .ks(3)
6809 .iterations(1)
6810 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6811 }
6812 }
6813 }
6814 }
6815
6816 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_gt_16_small_kernel) {
6817 TEST_REQUIRES_X86_AVX2;
6818 for (uint32_t n = 17; n < 32; n++) {
6819 for (size_t k = 1; k <= 5; k += 2) {
6820 GemmMicrokernelTester()
6821 .mr(4)
6822 .nr(16)
6823 .kr(1)
6824 .sr(1)
6825 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006826 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006827 .k(k)
6828 .ks(3)
6829 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6830 }
6831 }
6832 }
6833
6834 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, n_div_16_small_kernel) {
6835 TEST_REQUIRES_X86_AVX2;
6836 for (uint32_t n = 32; n <= 48; n += 16) {
6837 for (size_t k = 1; k <= 5; k += 2) {
6838 GemmMicrokernelTester()
6839 .mr(4)
6840 .nr(16)
6841 .kr(1)
6842 .sr(1)
6843 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006844 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08006845 .k(k)
6846 .ks(3)
6847 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6848 }
6849 }
6850 }
6851
6852 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, strided_cm_subtile) {
6853 TEST_REQUIRES_X86_AVX2;
6854 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006855 for (uint32_t n = 1; n <= 16; n++) {
6856 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006857 GemmMicrokernelTester()
6858 .mr(4)
6859 .nr(16)
6860 .kr(1)
6861 .sr(1)
6862 .m(m)
6863 .n(n)
6864 .k(k)
6865 .cm_stride(19)
6866 .iterations(1)
6867 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6868 }
6869 }
6870 }
6871 }
6872
6873 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, a_offset) {
6874 TEST_REQUIRES_X86_AVX2;
6875 for (size_t k = 1; k <= 5; k += 2) {
6876 GemmMicrokernelTester()
6877 .mr(4)
6878 .nr(16)
6879 .kr(1)
6880 .sr(1)
6881 .m(4)
6882 .n(16)
6883 .k(k)
6884 .ks(3)
6885 .a_offset(23)
6886 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6887 }
6888 }
6889
6890 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, zero) {
6891 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006892 for (size_t k = 1; k <= 5; k += 2) {
6893 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006894 GemmMicrokernelTester()
6895 .mr(4)
6896 .nr(16)
6897 .kr(1)
6898 .sr(1)
6899 .m(4)
6900 .n(16)
6901 .k(k)
6902 .ks(3)
6903 .a_offset(23)
6904 .zero_index(mz)
6905 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6906 }
6907 }
6908 }
6909
6910 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, qmin) {
6911 TEST_REQUIRES_X86_AVX2;
6912 GemmMicrokernelTester()
6913 .mr(4)
6914 .nr(16)
6915 .kr(1)
6916 .sr(1)
6917 .m(4)
6918 .n(16)
6919 .k(1)
6920 .qmin(128)
6921 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6922 }
6923
6924 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, qmax) {
6925 TEST_REQUIRES_X86_AVX2;
6926 GemmMicrokernelTester()
6927 .mr(4)
6928 .nr(16)
6929 .kr(1)
6930 .sr(1)
6931 .m(4)
6932 .n(16)
6933 .k(1)
6934 .qmax(128)
6935 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6936 }
6937
6938 TEST(F16_IGEMM_MINMAX_4X16__AVX2_BROADCAST, strided_cm) {
6939 TEST_REQUIRES_X86_AVX2;
6940 GemmMicrokernelTester()
6941 .mr(4)
6942 .nr(16)
6943 .kr(1)
6944 .sr(1)
6945 .m(4)
6946 .n(16)
6947 .k(1)
6948 .cm_stride(19)
6949 .Test(xnn_f16_igemm_minmax_ukernel_4x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6950 }
6951#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6952
6953
6954#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6955 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1) {
6956 TEST_REQUIRES_X86_AVX2;
6957 GemmMicrokernelTester()
6958 .mr(5)
6959 .nr(16)
6960 .kr(1)
6961 .sr(1)
6962 .m(5)
6963 .n(16)
6964 .k(1)
6965 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6966 }
6967
6968 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, strided_cn) {
6969 TEST_REQUIRES_X86_AVX2;
6970 GemmMicrokernelTester()
6971 .mr(5)
6972 .nr(16)
6973 .kr(1)
6974 .sr(1)
6975 .m(5)
6976 .n(16)
6977 .k(1)
6978 .cn_stride(19)
6979 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6980 }
6981
6982 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_subtile) {
6983 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006984 for (uint32_t n = 1; n <= 16; n++) {
6985 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08006986 GemmMicrokernelTester()
6987 .mr(5)
6988 .nr(16)
6989 .kr(1)
6990 .sr(1)
6991 .m(m)
6992 .n(n)
6993 .k(1)
6994 .iterations(1)
6995 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
6996 }
6997 }
6998 }
6999
7000 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_subtile_m) {
7001 TEST_REQUIRES_X86_AVX2;
7002 for (uint32_t m = 1; m <= 5; m++) {
7003 GemmMicrokernelTester()
7004 .mr(5)
7005 .nr(16)
7006 .kr(1)
7007 .sr(1)
7008 .m(m)
7009 .n(16)
7010 .k(1)
7011 .iterations(1)
7012 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7013 }
7014 }
7015
7016 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, k_eq_1_subtile_n) {
7017 TEST_REQUIRES_X86_AVX2;
7018 for (uint32_t n = 1; n <= 16; n++) {
7019 GemmMicrokernelTester()
7020 .mr(5)
7021 .nr(16)
7022 .kr(1)
7023 .sr(1)
7024 .m(5)
7025 .n(n)
7026 .k(1)
7027 .iterations(1)
7028 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7029 }
7030 }
7031
7032 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, k_gt_1) {
7033 TEST_REQUIRES_X86_AVX2;
7034 for (size_t k = 2; k < 10; k++) {
7035 GemmMicrokernelTester()
7036 .mr(5)
7037 .nr(16)
7038 .kr(1)
7039 .sr(1)
7040 .m(5)
7041 .n(16)
7042 .k(k)
7043 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7044 }
7045 }
7046
7047 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, k_gt_1_subtile) {
7048 TEST_REQUIRES_X86_AVX2;
7049 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007050 for (uint32_t n = 1; n <= 16; n++) {
7051 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08007052 GemmMicrokernelTester()
7053 .mr(5)
7054 .nr(16)
7055 .kr(1)
7056 .sr(1)
7057 .m(m)
7058 .n(n)
7059 .k(k)
7060 .iterations(1)
7061 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7062 }
7063 }
7064 }
7065 }
7066
7067 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16) {
7068 TEST_REQUIRES_X86_AVX2;
7069 for (uint32_t n = 17; n < 32; n++) {
7070 for (size_t k = 1; k <= 5; k += 2) {
7071 GemmMicrokernelTester()
7072 .mr(5)
7073 .nr(16)
7074 .kr(1)
7075 .sr(1)
7076 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007077 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007078 .k(k)
7079 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7080 }
7081 }
7082 }
7083
7084 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16_strided_cn) {
7085 TEST_REQUIRES_X86_AVX2;
7086 for (uint32_t n = 17; n < 32; n++) {
7087 for (size_t k = 1; k <= 5; k += 2) {
7088 GemmMicrokernelTester()
7089 .mr(5)
7090 .nr(16)
7091 .kr(1)
7092 .sr(1)
7093 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007094 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007095 .k(k)
7096 .cn_stride(19)
7097 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7098 }
7099 }
7100 }
7101
7102 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16_subtile) {
7103 TEST_REQUIRES_X86_AVX2;
7104 for (uint32_t n = 17; n < 32; n++) {
7105 for (size_t k = 1; k <= 5; k += 2) {
7106 for (uint32_t m = 1; m <= 5; m++) {
7107 GemmMicrokernelTester()
7108 .mr(5)
7109 .nr(16)
7110 .kr(1)
7111 .sr(1)
7112 .m(m)
7113 .n(n)
7114 .k(k)
7115 .iterations(1)
7116 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7117 }
7118 }
7119 }
7120 }
7121
7122 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16) {
7123 TEST_REQUIRES_X86_AVX2;
7124 for (uint32_t n = 32; n <= 48; n += 16) {
7125 for (size_t k = 1; k <= 5; k += 2) {
7126 GemmMicrokernelTester()
7127 .mr(5)
7128 .nr(16)
7129 .kr(1)
7130 .sr(1)
7131 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007132 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007133 .k(k)
7134 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7135 }
7136 }
7137 }
7138
7139 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16_strided_cn) {
7140 TEST_REQUIRES_X86_AVX2;
7141 for (uint32_t n = 32; n <= 48; n += 16) {
7142 for (size_t k = 1; k <= 5; k += 2) {
7143 GemmMicrokernelTester()
7144 .mr(5)
7145 .nr(16)
7146 .kr(1)
7147 .sr(1)
7148 .m(5)
7149 .n(n)
7150 .k(k)
7151 .cn_stride(19)
7152 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7153 }
7154 }
7155 }
7156
7157 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16_subtile) {
7158 TEST_REQUIRES_X86_AVX2;
7159 for (uint32_t n = 32; n <= 48; n += 16) {
7160 for (size_t k = 1; k <= 5; k += 2) {
7161 for (uint32_t m = 1; m <= 5; m++) {
7162 GemmMicrokernelTester()
7163 .mr(5)
7164 .nr(16)
7165 .kr(1)
7166 .sr(1)
7167 .m(m)
7168 .n(n)
7169 .k(k)
7170 .iterations(1)
7171 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7172 }
7173 }
7174 }
7175 }
7176
7177 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, small_kernel) {
7178 TEST_REQUIRES_X86_AVX2;
7179 for (size_t k = 1; k <= 5; k += 2) {
7180 GemmMicrokernelTester()
7181 .mr(5)
7182 .nr(16)
7183 .kr(1)
7184 .sr(1)
7185 .m(5)
7186 .n(16)
7187 .k(k)
7188 .ks(3)
7189 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7190 }
7191 }
7192
7193 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, small_kernel_subtile) {
7194 TEST_REQUIRES_X86_AVX2;
7195 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007196 for (uint32_t n = 1; n <= 16; n++) {
7197 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08007198 GemmMicrokernelTester()
7199 .mr(5)
7200 .nr(16)
7201 .kr(1)
7202 .sr(1)
7203 .m(m)
7204 .n(n)
7205 .k(k)
7206 .ks(3)
7207 .iterations(1)
7208 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7209 }
7210 }
7211 }
7212 }
7213
7214 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_gt_16_small_kernel) {
7215 TEST_REQUIRES_X86_AVX2;
7216 for (uint32_t n = 17; n < 32; n++) {
7217 for (size_t k = 1; k <= 5; k += 2) {
7218 GemmMicrokernelTester()
7219 .mr(5)
7220 .nr(16)
7221 .kr(1)
7222 .sr(1)
7223 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007224 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007225 .k(k)
7226 .ks(3)
7227 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7228 }
7229 }
7230 }
7231
7232 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, n_div_16_small_kernel) {
7233 TEST_REQUIRES_X86_AVX2;
7234 for (uint32_t n = 32; n <= 48; n += 16) {
7235 for (size_t k = 1; k <= 5; k += 2) {
7236 GemmMicrokernelTester()
7237 .mr(5)
7238 .nr(16)
7239 .kr(1)
7240 .sr(1)
7241 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007242 .n(n)
Marat Dukhanc4302c22022-01-06 19:27:03 -08007243 .k(k)
7244 .ks(3)
7245 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7246 }
7247 }
7248 }
7249
7250 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, strided_cm_subtile) {
7251 TEST_REQUIRES_X86_AVX2;
7252 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007253 for (uint32_t n = 1; n <= 16; n++) {
7254 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08007255 GemmMicrokernelTester()
7256 .mr(5)
7257 .nr(16)
7258 .kr(1)
7259 .sr(1)
7260 .m(m)
7261 .n(n)
7262 .k(k)
7263 .cm_stride(19)
7264 .iterations(1)
7265 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7266 }
7267 }
7268 }
7269 }
7270
7271 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, a_offset) {
7272 TEST_REQUIRES_X86_AVX2;
7273 for (size_t k = 1; k <= 5; k += 2) {
7274 GemmMicrokernelTester()
7275 .mr(5)
7276 .nr(16)
7277 .kr(1)
7278 .sr(1)
7279 .m(5)
7280 .n(16)
7281 .k(k)
7282 .ks(3)
7283 .a_offset(29)
7284 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7285 }
7286 }
7287
7288 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, zero) {
7289 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007290 for (size_t k = 1; k <= 5; k += 2) {
7291 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhanc4302c22022-01-06 19:27:03 -08007292 GemmMicrokernelTester()
7293 .mr(5)
7294 .nr(16)
7295 .kr(1)
7296 .sr(1)
7297 .m(5)
7298 .n(16)
7299 .k(k)
7300 .ks(3)
7301 .a_offset(29)
7302 .zero_index(mz)
7303 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7304 }
7305 }
7306 }
7307
7308 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, qmin) {
7309 TEST_REQUIRES_X86_AVX2;
7310 GemmMicrokernelTester()
7311 .mr(5)
7312 .nr(16)
7313 .kr(1)
7314 .sr(1)
7315 .m(5)
7316 .n(16)
7317 .k(1)
7318 .qmin(128)
7319 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7320 }
7321
7322 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, qmax) {
7323 TEST_REQUIRES_X86_AVX2;
7324 GemmMicrokernelTester()
7325 .mr(5)
7326 .nr(16)
7327 .kr(1)
7328 .sr(1)
7329 .m(5)
7330 .n(16)
7331 .k(1)
7332 .qmax(128)
7333 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7334 }
7335
7336 TEST(F16_IGEMM_MINMAX_5X16__AVX2_BROADCAST, strided_cm) {
7337 TEST_REQUIRES_X86_AVX2;
7338 GemmMicrokernelTester()
7339 .mr(5)
7340 .nr(16)
7341 .kr(1)
7342 .sr(1)
7343 .m(5)
7344 .n(16)
7345 .k(1)
7346 .cm_stride(19)
7347 .Test(xnn_f16_igemm_minmax_ukernel_5x16__avx2_broadcast, xnn_init_f16_scaleminmax_avx_params);
7348 }
7349#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64