blob: d49b85c203453aec2fd925d2cfba2805c052dbfe [file] [log] [blame]
Marat Dukhan1c587112020-04-08 20:04:28 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-igemm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhan1c587112020-04-08 20:04:28 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard79cd5f92021-06-21 17:34:59 -070027 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4) {
28 TEST_REQUIRES_ARM_NEON_FMA;
29 GemmMicrokernelTester()
30 .mr(6)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(6)
35 .n(8)
36 .k(4)
37 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
38 }
39
40 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_FMA;
42 GemmMicrokernelTester()
43 .mr(6)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(6)
48 .n(8)
49 .k(4)
50 .cn_stride(11)
51 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
52 }
53
54 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
55 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080056 for (uint32_t n = 1; n <= 8; n++) {
57 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -070058 GemmMicrokernelTester()
59 .mr(6)
60 .nr(8)
61 .kr(1)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(4)
66 .iterations(1)
67 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
68 }
69 }
70 }
71
72 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
73 TEST_REQUIRES_ARM_NEON_FMA;
74 for (uint32_t m = 1; m <= 6; m++) {
75 GemmMicrokernelTester()
76 .mr(6)
77 .nr(8)
78 .kr(1)
79 .sr(1)
80 .m(m)
81 .n(8)
82 .k(4)
83 .iterations(1)
84 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
85 }
86 }
87
88 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
89 TEST_REQUIRES_ARM_NEON_FMA;
90 for (uint32_t n = 1; n <= 8; n++) {
91 GemmMicrokernelTester()
92 .mr(6)
93 .nr(8)
94 .kr(1)
95 .sr(1)
96 .m(6)
97 .n(n)
98 .k(4)
99 .iterations(1)
100 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
101 }
102 }
103
104 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4) {
105 TEST_REQUIRES_ARM_NEON_FMA;
106 for (size_t k = 1; k < 4; k++) {
107 GemmMicrokernelTester()
108 .mr(6)
109 .nr(8)
110 .kr(1)
111 .sr(1)
112 .m(6)
113 .n(8)
114 .k(k)
115 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
116 }
117 }
118
119 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
120 TEST_REQUIRES_ARM_NEON_FMA;
121 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800122 for (uint32_t n = 1; n <= 8; n++) {
123 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -0700124 GemmMicrokernelTester()
125 .mr(6)
126 .nr(8)
127 .kr(1)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
133 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
134 }
135 }
136 }
137 }
138
139 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4) {
140 TEST_REQUIRES_ARM_NEON_FMA;
141 for (size_t k = 5; k < 8; k++) {
142 GemmMicrokernelTester()
143 .mr(6)
144 .nr(8)
145 .kr(1)
146 .sr(1)
147 .m(6)
148 .n(8)
149 .k(k)
150 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
151 }
152 }
153
154 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
155 TEST_REQUIRES_ARM_NEON_FMA;
156 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800157 for (uint32_t n = 1; n <= 8; n++) {
158 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -0700159 GemmMicrokernelTester()
160 .mr(6)
161 .nr(8)
162 .kr(1)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
168 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
169 }
170 }
171 }
172 }
173
174 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4) {
175 TEST_REQUIRES_ARM_NEON_FMA;
176 for (size_t k = 8; k <= 40; k += 4) {
177 GemmMicrokernelTester()
178 .mr(6)
179 .nr(8)
180 .kr(1)
181 .sr(1)
182 .m(6)
183 .n(8)
184 .k(k)
185 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
186 }
187 }
188
189 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
190 TEST_REQUIRES_ARM_NEON_FMA;
191 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800192 for (uint32_t n = 1; n <= 8; n++) {
193 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -0700194 GemmMicrokernelTester()
195 .mr(6)
196 .nr(8)
197 .kr(1)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
203 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
204 }
205 }
206 }
207 }
208
209 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8) {
210 TEST_REQUIRES_ARM_NEON_FMA;
211 for (uint32_t n = 9; n < 16; n++) {
212 for (size_t k = 1; k <= 20; k += 5) {
213 GemmMicrokernelTester()
214 .mr(6)
215 .nr(8)
216 .kr(1)
217 .sr(1)
218 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800219 .n(n)
Frank Barchard79cd5f92021-06-21 17:34:59 -0700220 .k(k)
221 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
222 }
223 }
224 }
225
226 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
227 TEST_REQUIRES_ARM_NEON_FMA;
228 for (uint32_t n = 9; n < 16; n++) {
229 for (size_t k = 1; k <= 20; k += 5) {
230 GemmMicrokernelTester()
231 .mr(6)
232 .nr(8)
233 .kr(1)
234 .sr(1)
235 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800236 .n(n)
Frank Barchard79cd5f92021-06-21 17:34:59 -0700237 .k(k)
238 .cn_stride(11)
239 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
240 }
241 }
242 }
243
244 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
245 TEST_REQUIRES_ARM_NEON_FMA;
246 for (uint32_t n = 9; n < 16; n++) {
247 for (size_t k = 1; k <= 20; k += 5) {
248 for (uint32_t m = 1; m <= 6; m++) {
249 GemmMicrokernelTester()
250 .mr(6)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
258 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
259 }
260 }
261 }
262 }
263
264 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8) {
265 TEST_REQUIRES_ARM_NEON_FMA;
266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 20; k += 5) {
268 GemmMicrokernelTester()
269 .mr(6)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800274 .n(n)
Frank Barchard79cd5f92021-06-21 17:34:59 -0700275 .k(k)
276 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
277 }
278 }
279 }
280
281 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
282 TEST_REQUIRES_ARM_NEON_FMA;
283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 20; k += 5) {
285 GemmMicrokernelTester()
286 .mr(6)
287 .nr(8)
288 .kr(1)
289 .sr(1)
290 .m(6)
291 .n(n)
292 .k(k)
293 .cn_stride(11)
294 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
295 }
296 }
297 }
298
299 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
300 TEST_REQUIRES_ARM_NEON_FMA;
301 for (uint32_t n = 16; n <= 24; n += 8) {
302 for (size_t k = 1; k <= 20; k += 5) {
303 for (uint32_t m = 1; m <= 6; m++) {
304 GemmMicrokernelTester()
305 .mr(6)
306 .nr(8)
307 .kr(1)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
313 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
314 }
315 }
316 }
317 }
318
319 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, small_kernel) {
320 TEST_REQUIRES_ARM_NEON_FMA;
321 for (size_t k = 1; k <= 20; k += 5) {
322 GemmMicrokernelTester()
323 .mr(6)
324 .nr(8)
325 .kr(1)
326 .sr(1)
327 .m(6)
328 .n(8)
329 .k(k)
330 .ks(3)
331 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
332 }
333 }
334
335 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON_FMA;
337 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800338 for (uint32_t n = 1; n <= 8; n++) {
339 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -0700340 GemmMicrokernelTester()
341 .mr(6)
342 .nr(8)
343 .kr(1)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
350 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
351 }
352 }
353 }
354 }
355
356 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_gt_8_small_kernel) {
357 TEST_REQUIRES_ARM_NEON_FMA;
358 for (uint32_t n = 9; n < 16; n++) {
359 for (size_t k = 1; k <= 20; k += 5) {
360 GemmMicrokernelTester()
361 .mr(6)
362 .nr(8)
363 .kr(1)
364 .sr(1)
365 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800366 .n(n)
Frank Barchard79cd5f92021-06-21 17:34:59 -0700367 .k(k)
368 .ks(3)
369 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
370 }
371 }
372 }
373
374 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, n_div_8_small_kernel) {
375 TEST_REQUIRES_ARM_NEON_FMA;
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 20; k += 5) {
378 GemmMicrokernelTester()
379 .mr(6)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800384 .n(n)
Frank Barchard79cd5f92021-06-21 17:34:59 -0700385 .k(k)
386 .ks(3)
387 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
388 }
389 }
390 }
391
392 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON_FMA;
394 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800395 for (uint32_t n = 1; n <= 8; n++) {
396 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -0700397 GemmMicrokernelTester()
398 .mr(6)
399 .nr(8)
400 .kr(1)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
407 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
408 }
409 }
410 }
411 }
412
413 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, a_offset) {
414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t k = 1; k <= 20; k += 5) {
416 GemmMicrokernelTester()
417 .mr(6)
418 .nr(8)
419 .kr(1)
420 .sr(1)
421 .m(6)
422 .n(8)
423 .k(k)
424 .ks(3)
425 .a_offset(127)
426 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
427 }
428 }
429
430 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, zero) {
431 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800432 for (size_t k = 1; k <= 20; k += 5) {
433 for (uint32_t mz = 0; mz < 6; mz++) {
Frank Barchard79cd5f92021-06-21 17:34:59 -0700434 GemmMicrokernelTester()
435 .mr(6)
436 .nr(8)
437 .kr(1)
438 .sr(1)
439 .m(6)
440 .n(8)
441 .k(k)
442 .ks(3)
443 .a_offset(127)
444 .zero_index(mz)
445 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
446 }
447 }
448 }
449
450 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmin) {
451 TEST_REQUIRES_ARM_NEON_FMA;
452 GemmMicrokernelTester()
453 .mr(6)
454 .nr(8)
455 .kr(1)
456 .sr(1)
457 .m(6)
458 .n(8)
459 .k(4)
460 .qmin(128)
461 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
462 }
463
464 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, qmax) {
465 TEST_REQUIRES_ARM_NEON_FMA;
466 GemmMicrokernelTester()
467 .mr(6)
468 .nr(8)
469 .kr(1)
470 .sr(1)
471 .m(6)
472 .n(8)
473 .k(4)
474 .qmax(128)
475 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
476 }
477
478 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD128, strided_cm) {
479 TEST_REQUIRES_ARM_NEON_FMA;
480 GemmMicrokernelTester()
481 .mr(6)
482 .nr(8)
483 .kr(1)
484 .sr(1)
485 .m(6)
486 .n(8)
487 .k(4)
488 .cm_stride(11)
489 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
490 }
491#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
492
493
494#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde3491242021-06-11 14:04:57 -0700495 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2) {
496 TEST_REQUIRES_ARM_NEON_FMA;
497 GemmMicrokernelTester()
498 .mr(6)
499 .nr(8)
500 .kr(1)
501 .sr(1)
502 .m(6)
503 .n(8)
504 .k(2)
505 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
506 }
507
508 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cn) {
509 TEST_REQUIRES_ARM_NEON_FMA;
510 GemmMicrokernelTester()
511 .mr(6)
512 .nr(8)
513 .kr(1)
514 .sr(1)
515 .m(6)
516 .n(8)
517 .k(2)
518 .cn_stride(11)
519 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
520 }
521
522 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
523 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800524 for (uint32_t n = 1; n <= 8; n++) {
525 for (uint32_t m = 1; m <= 6; m++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700526 GemmMicrokernelTester()
527 .mr(6)
528 .nr(8)
529 .kr(1)
530 .sr(1)
531 .m(m)
532 .n(n)
533 .k(2)
534 .iterations(1)
535 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
536 }
537 }
538 }
539
540 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
541 TEST_REQUIRES_ARM_NEON_FMA;
542 for (uint32_t m = 1; m <= 6; m++) {
543 GemmMicrokernelTester()
544 .mr(6)
545 .nr(8)
546 .kr(1)
547 .sr(1)
548 .m(m)
549 .n(8)
550 .k(2)
551 .iterations(1)
552 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
553 }
554 }
555
556 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
557 TEST_REQUIRES_ARM_NEON_FMA;
558 for (uint32_t n = 1; n <= 8; n++) {
559 GemmMicrokernelTester()
560 .mr(6)
561 .nr(8)
562 .kr(1)
563 .sr(1)
564 .m(6)
565 .n(n)
566 .k(2)
567 .iterations(1)
568 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
569 }
570 }
571
572 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2) {
573 TEST_REQUIRES_ARM_NEON_FMA;
574 for (size_t k = 1; k < 2; k++) {
575 GemmMicrokernelTester()
576 .mr(6)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(6)
581 .n(8)
582 .k(k)
583 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
584 }
585 }
586
587 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
588 TEST_REQUIRES_ARM_NEON_FMA;
589 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800590 for (uint32_t n = 1; n <= 8; n++) {
591 for (uint32_t m = 1; m <= 6; m++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700592 GemmMicrokernelTester()
593 .mr(6)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(m)
598 .n(n)
599 .k(k)
600 .iterations(1)
601 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
602 }
603 }
604 }
605 }
606
607 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2) {
608 TEST_REQUIRES_ARM_NEON_FMA;
609 for (size_t k = 3; k < 4; k++) {
610 GemmMicrokernelTester()
611 .mr(6)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(6)
616 .n(8)
617 .k(k)
618 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
619 }
620 }
621
622 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
623 TEST_REQUIRES_ARM_NEON_FMA;
624 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800625 for (uint32_t n = 1; n <= 8; n++) {
626 for (uint32_t m = 1; m <= 6; m++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700627 GemmMicrokernelTester()
628 .mr(6)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(m)
633 .n(n)
634 .k(k)
635 .iterations(1)
636 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
637 }
638 }
639 }
640 }
641
642 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2) {
643 TEST_REQUIRES_ARM_NEON_FMA;
644 for (size_t k = 4; k <= 20; k += 2) {
645 GemmMicrokernelTester()
646 .mr(6)
647 .nr(8)
648 .kr(1)
649 .sr(1)
650 .m(6)
651 .n(8)
652 .k(k)
653 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
654 }
655 }
656
657 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 6; m++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700662 GemmMicrokernelTester()
663 .mr(6)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
671 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
672 }
673 }
674 }
675 }
676
677 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8) {
678 TEST_REQUIRES_ARM_NEON_FMA;
679 for (uint32_t n = 9; n < 16; n++) {
680 for (size_t k = 1; k <= 10; k += 3) {
681 GemmMicrokernelTester()
682 .mr(6)
683 .nr(8)
684 .kr(1)
685 .sr(1)
686 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800687 .n(n)
Frank Barcharde3491242021-06-11 14:04:57 -0700688 .k(k)
689 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
690 }
691 }
692 }
693
694 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
695 TEST_REQUIRES_ARM_NEON_FMA;
696 for (uint32_t n = 9; n < 16; n++) {
697 for (size_t k = 1; k <= 10; k += 3) {
698 GemmMicrokernelTester()
699 .mr(6)
700 .nr(8)
701 .kr(1)
702 .sr(1)
703 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800704 .n(n)
Frank Barcharde3491242021-06-11 14:04:57 -0700705 .k(k)
706 .cn_stride(11)
707 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
708 }
709 }
710 }
711
712 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
713 TEST_REQUIRES_ARM_NEON_FMA;
714 for (uint32_t n = 9; n < 16; n++) {
715 for (size_t k = 1; k <= 10; k += 3) {
716 for (uint32_t m = 1; m <= 6; m++) {
717 GemmMicrokernelTester()
718 .mr(6)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(m)
723 .n(n)
724 .k(k)
725 .iterations(1)
726 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
727 }
728 }
729 }
730 }
731
732 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8) {
733 TEST_REQUIRES_ARM_NEON_FMA;
734 for (uint32_t n = 16; n <= 24; n += 8) {
735 for (size_t k = 1; k <= 10; k += 3) {
736 GemmMicrokernelTester()
737 .mr(6)
738 .nr(8)
739 .kr(1)
740 .sr(1)
741 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800742 .n(n)
Frank Barcharde3491242021-06-11 14:04:57 -0700743 .k(k)
744 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
745 }
746 }
747 }
748
749 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
750 TEST_REQUIRES_ARM_NEON_FMA;
751 for (uint32_t n = 16; n <= 24; n += 8) {
752 for (size_t k = 1; k <= 10; k += 3) {
753 GemmMicrokernelTester()
754 .mr(6)
755 .nr(8)
756 .kr(1)
757 .sr(1)
758 .m(6)
759 .n(n)
760 .k(k)
761 .cn_stride(11)
762 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
763 }
764 }
765 }
766
767 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
768 TEST_REQUIRES_ARM_NEON_FMA;
769 for (uint32_t n = 16; n <= 24; n += 8) {
770 for (size_t k = 1; k <= 10; k += 3) {
771 for (uint32_t m = 1; m <= 6; m++) {
772 GemmMicrokernelTester()
773 .mr(6)
774 .nr(8)
775 .kr(1)
776 .sr(1)
777 .m(m)
778 .n(n)
779 .k(k)
780 .iterations(1)
781 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
782 }
783 }
784 }
785 }
786
787 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, small_kernel) {
788 TEST_REQUIRES_ARM_NEON_FMA;
789 for (size_t k = 1; k <= 10; k += 3) {
790 GemmMicrokernelTester()
791 .mr(6)
792 .nr(8)
793 .kr(1)
794 .sr(1)
795 .m(6)
796 .n(8)
797 .k(k)
798 .ks(3)
799 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
800 }
801 }
802
803 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, small_kernel_subtile) {
804 TEST_REQUIRES_ARM_NEON_FMA;
805 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800806 for (uint32_t n = 1; n <= 8; n++) {
807 for (uint32_t m = 1; m <= 6; m++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700808 GemmMicrokernelTester()
809 .mr(6)
810 .nr(8)
811 .kr(1)
812 .sr(1)
813 .m(m)
814 .n(n)
815 .k(k)
816 .ks(3)
817 .iterations(1)
818 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
819 }
820 }
821 }
822 }
823
824 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_gt_8_small_kernel) {
825 TEST_REQUIRES_ARM_NEON_FMA;
826 for (uint32_t n = 9; n < 16; n++) {
827 for (size_t k = 1; k <= 10; k += 3) {
828 GemmMicrokernelTester()
829 .mr(6)
830 .nr(8)
831 .kr(1)
832 .sr(1)
833 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800834 .n(n)
Frank Barcharde3491242021-06-11 14:04:57 -0700835 .k(k)
836 .ks(3)
837 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
838 }
839 }
840 }
841
842 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, n_div_8_small_kernel) {
843 TEST_REQUIRES_ARM_NEON_FMA;
844 for (uint32_t n = 16; n <= 24; n += 8) {
845 for (size_t k = 1; k <= 10; k += 3) {
846 GemmMicrokernelTester()
847 .mr(6)
848 .nr(8)
849 .kr(1)
850 .sr(1)
851 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800852 .n(n)
Frank Barcharde3491242021-06-11 14:04:57 -0700853 .k(k)
854 .ks(3)
855 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
856 }
857 }
858 }
859
860 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
861 TEST_REQUIRES_ARM_NEON_FMA;
862 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800863 for (uint32_t n = 1; n <= 8; n++) {
864 for (uint32_t m = 1; m <= 6; m++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700865 GemmMicrokernelTester()
866 .mr(6)
867 .nr(8)
868 .kr(1)
869 .sr(1)
870 .m(m)
871 .n(n)
872 .k(k)
873 .cm_stride(11)
874 .iterations(1)
875 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
876 }
877 }
878 }
879 }
880
881 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, a_offset) {
882 TEST_REQUIRES_ARM_NEON_FMA;
883 for (size_t k = 1; k <= 10; k += 3) {
884 GemmMicrokernelTester()
885 .mr(6)
886 .nr(8)
887 .kr(1)
888 .sr(1)
889 .m(6)
890 .n(8)
891 .k(k)
892 .ks(3)
893 .a_offset(67)
894 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
895 }
896 }
897
898 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, zero) {
899 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800900 for (size_t k = 1; k <= 10; k += 3) {
901 for (uint32_t mz = 0; mz < 6; mz++) {
Frank Barcharde3491242021-06-11 14:04:57 -0700902 GemmMicrokernelTester()
903 .mr(6)
904 .nr(8)
905 .kr(1)
906 .sr(1)
907 .m(6)
908 .n(8)
909 .k(k)
910 .ks(3)
911 .a_offset(67)
912 .zero_index(mz)
913 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
914 }
915 }
916 }
917
918 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmin) {
919 TEST_REQUIRES_ARM_NEON_FMA;
920 GemmMicrokernelTester()
921 .mr(6)
922 .nr(8)
923 .kr(1)
924 .sr(1)
925 .m(6)
926 .n(8)
927 .k(2)
928 .qmin(128)
929 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
930 }
931
932 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, qmax) {
933 TEST_REQUIRES_ARM_NEON_FMA;
934 GemmMicrokernelTester()
935 .mr(6)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(6)
940 .n(8)
941 .k(2)
942 .qmax(128)
943 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
944 }
945
946 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_LD64, strided_cm) {
947 TEST_REQUIRES_ARM_NEON_FMA;
948 GemmMicrokernelTester()
949 .mr(6)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(6)
954 .n(8)
955 .k(2)
956 .cm_stride(11)
957 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
958 }
959#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
960
961
962#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -0700963 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700964 TEST_REQUIRES_ARM_NEON_FMA;
965 GemmMicrokernelTester()
966 .mr(1)
967 .nr(8)
968 .kr(1)
969 .sr(1)
970 .m(1)
971 .n(8)
972 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700973 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700974 }
975
Marat Dukhande06f492020-04-09 00:19:31 -0700976 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700977 TEST_REQUIRES_ARM_NEON_FMA;
978 GemmMicrokernelTester()
979 .mr(1)
980 .nr(8)
981 .kr(1)
982 .sr(1)
983 .m(1)
984 .n(8)
985 .k(8)
986 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700987 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -0700988 }
989
Marat Dukhande06f492020-04-09 00:19:31 -0700990 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700991 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800992 for (uint32_t n = 1; n <= 8; n++) {
993 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -0700994 GemmMicrokernelTester()
995 .mr(1)
996 .nr(8)
997 .kr(1)
998 .sr(1)
999 .m(m)
1000 .n(n)
1001 .k(8)
1002 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001003 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001004 }
1005 }
1006 }
1007
Marat Dukhande06f492020-04-09 00:19:31 -07001008 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001009 TEST_REQUIRES_ARM_NEON_FMA;
1010 for (uint32_t m = 1; m <= 1; m++) {
1011 GemmMicrokernelTester()
1012 .mr(1)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(m)
1017 .n(8)
1018 .k(8)
1019 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001020 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001021 }
1022 }
1023
Marat Dukhande06f492020-04-09 00:19:31 -07001024 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (uint32_t n = 1; n <= 8; n++) {
1027 GemmMicrokernelTester()
1028 .mr(1)
1029 .nr(8)
1030 .kr(1)
1031 .sr(1)
1032 .m(1)
1033 .n(n)
1034 .k(8)
1035 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001036 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001037 }
1038 }
1039
Marat Dukhande06f492020-04-09 00:19:31 -07001040 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001041 TEST_REQUIRES_ARM_NEON_FMA;
1042 GemmMicrokernelTester()
1043 .mr(1)
1044 .nr(8)
1045 .kr(1)
1046 .sr(1)
1047 .m(1)
1048 .n(8)
1049 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001050 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001051 }
1052
Marat Dukhande06f492020-04-09 00:19:31 -07001053 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001054 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001055 for (uint32_t n = 1; n <= 8; n++) {
1056 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001057 GemmMicrokernelTester()
1058 .mr(1)
1059 .nr(8)
1060 .kr(1)
1061 .sr(1)
1062 .m(m)
1063 .n(n)
1064 .k(16)
1065 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001066 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001067 }
1068 }
1069 }
1070
Marat Dukhande06f492020-04-09 00:19:31 -07001071 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001072 TEST_REQUIRES_ARM_NEON_FMA;
1073 for (size_t k = 1; k < 16; k++) {
1074 GemmMicrokernelTester()
1075 .mr(1)
1076 .nr(8)
1077 .kr(1)
1078 .sr(1)
1079 .m(1)
1080 .n(8)
1081 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001082 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001083 }
1084 }
1085
Marat Dukhande06f492020-04-09 00:19:31 -07001086 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001087 TEST_REQUIRES_ARM_NEON_FMA;
1088 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001089 for (uint32_t n = 1; n <= 8; n++) {
1090 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001091 GemmMicrokernelTester()
1092 .mr(1)
1093 .nr(8)
1094 .kr(1)
1095 .sr(1)
1096 .m(m)
1097 .n(n)
1098 .k(k)
1099 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001100 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001101 }
1102 }
1103 }
1104 }
1105
Marat Dukhande06f492020-04-09 00:19:31 -07001106 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001107 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001108 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001109 GemmMicrokernelTester()
1110 .mr(1)
1111 .nr(8)
1112 .kr(1)
1113 .sr(1)
1114 .m(1)
1115 .n(8)
1116 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001117 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001118 }
1119 }
1120
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001121 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001122 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001123 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001124 for (uint32_t n = 1; n <= 8; n++) {
1125 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001126 GemmMicrokernelTester()
1127 .mr(1)
1128 .nr(8)
1129 .kr(1)
1130 .sr(1)
1131 .m(m)
1132 .n(n)
1133 .k(k)
1134 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001135 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001136 }
1137 }
1138 }
1139 }
1140
Marat Dukhande06f492020-04-09 00:19:31 -07001141 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001142 TEST_REQUIRES_ARM_NEON_FMA;
1143 for (size_t k = 24; k <= 80; k += 8) {
1144 GemmMicrokernelTester()
1145 .mr(1)
1146 .nr(8)
1147 .kr(1)
1148 .sr(1)
1149 .m(1)
1150 .n(8)
1151 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001152 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001153 }
1154 }
1155
Marat Dukhande06f492020-04-09 00:19:31 -07001156 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001157 TEST_REQUIRES_ARM_NEON_FMA;
1158 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001159 for (uint32_t n = 1; n <= 8; n++) {
1160 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001161 GemmMicrokernelTester()
1162 .mr(1)
1163 .nr(8)
1164 .kr(1)
1165 .sr(1)
1166 .m(m)
1167 .n(n)
1168 .k(k)
1169 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001170 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001171 }
1172 }
1173 }
1174 }
1175
Marat Dukhande06f492020-04-09 00:19:31 -07001176 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001177 TEST_REQUIRES_ARM_NEON_FMA;
1178 for (uint32_t n = 9; n < 16; n++) {
1179 for (size_t k = 1; k <= 40; k += 9) {
1180 GemmMicrokernelTester()
1181 .mr(1)
1182 .nr(8)
1183 .kr(1)
1184 .sr(1)
1185 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001186 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001187 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001188 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001189 }
1190 }
1191 }
1192
Marat Dukhande06f492020-04-09 00:19:31 -07001193 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001194 TEST_REQUIRES_ARM_NEON_FMA;
1195 for (uint32_t n = 9; n < 16; n++) {
1196 for (size_t k = 1; k <= 40; k += 9) {
1197 GemmMicrokernelTester()
1198 .mr(1)
1199 .nr(8)
1200 .kr(1)
1201 .sr(1)
1202 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001203 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001204 .k(k)
1205 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001206 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001207 }
1208 }
1209 }
1210
Marat Dukhande06f492020-04-09 00:19:31 -07001211 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001212 TEST_REQUIRES_ARM_NEON_FMA;
1213 for (uint32_t n = 9; n < 16; n++) {
1214 for (size_t k = 1; k <= 40; k += 9) {
1215 for (uint32_t m = 1; m <= 1; m++) {
1216 GemmMicrokernelTester()
1217 .mr(1)
1218 .nr(8)
1219 .kr(1)
1220 .sr(1)
1221 .m(m)
1222 .n(n)
1223 .k(k)
1224 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001225 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001226 }
1227 }
1228 }
1229 }
1230
Marat Dukhande06f492020-04-09 00:19:31 -07001231 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001232 TEST_REQUIRES_ARM_NEON_FMA;
1233 for (uint32_t n = 16; n <= 24; n += 8) {
1234 for (size_t k = 1; k <= 40; k += 9) {
1235 GemmMicrokernelTester()
1236 .mr(1)
1237 .nr(8)
1238 .kr(1)
1239 .sr(1)
1240 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001241 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001242 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001243 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001244 }
1245 }
1246 }
1247
Marat Dukhande06f492020-04-09 00:19:31 -07001248 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001249 TEST_REQUIRES_ARM_NEON_FMA;
1250 for (uint32_t n = 16; n <= 24; n += 8) {
1251 for (size_t k = 1; k <= 40; k += 9) {
1252 GemmMicrokernelTester()
1253 .mr(1)
1254 .nr(8)
1255 .kr(1)
1256 .sr(1)
1257 .m(1)
1258 .n(n)
1259 .k(k)
1260 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001261 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001262 }
1263 }
1264 }
1265
Marat Dukhande06f492020-04-09 00:19:31 -07001266 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001267 TEST_REQUIRES_ARM_NEON_FMA;
1268 for (uint32_t n = 16; n <= 24; n += 8) {
1269 for (size_t k = 1; k <= 40; k += 9) {
1270 for (uint32_t m = 1; m <= 1; m++) {
1271 GemmMicrokernelTester()
1272 .mr(1)
1273 .nr(8)
1274 .kr(1)
1275 .sr(1)
1276 .m(m)
1277 .n(n)
1278 .k(k)
1279 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001280 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001281 }
1282 }
1283 }
1284 }
1285
Marat Dukhande06f492020-04-09 00:19:31 -07001286 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001287 TEST_REQUIRES_ARM_NEON_FMA;
1288 for (size_t k = 1; k <= 40; k += 9) {
1289 GemmMicrokernelTester()
1290 .mr(1)
1291 .nr(8)
1292 .kr(1)
1293 .sr(1)
1294 .m(1)
1295 .n(8)
1296 .k(k)
1297 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001298 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001299 }
1300 }
1301
Marat Dukhande06f492020-04-09 00:19:31 -07001302 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001303 TEST_REQUIRES_ARM_NEON_FMA;
1304 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001305 for (uint32_t n = 1; n <= 8; n++) {
1306 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001307 GemmMicrokernelTester()
1308 .mr(1)
1309 .nr(8)
1310 .kr(1)
1311 .sr(1)
1312 .m(m)
1313 .n(n)
1314 .k(k)
1315 .ks(3)
1316 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001317 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001318 }
1319 }
1320 }
1321 }
1322
Marat Dukhande06f492020-04-09 00:19:31 -07001323 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001324 TEST_REQUIRES_ARM_NEON_FMA;
1325 for (uint32_t n = 9; n < 16; n++) {
1326 for (size_t k = 1; k <= 40; k += 9) {
1327 GemmMicrokernelTester()
1328 .mr(1)
1329 .nr(8)
1330 .kr(1)
1331 .sr(1)
1332 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001333 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001334 .k(k)
1335 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001336 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001337 }
1338 }
1339 }
1340
Marat Dukhande06f492020-04-09 00:19:31 -07001341 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001342 TEST_REQUIRES_ARM_NEON_FMA;
1343 for (uint32_t n = 16; n <= 24; n += 8) {
1344 for (size_t k = 1; k <= 40; k += 9) {
1345 GemmMicrokernelTester()
1346 .mr(1)
1347 .nr(8)
1348 .kr(1)
1349 .sr(1)
1350 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001351 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001352 .k(k)
1353 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001354 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001355 }
1356 }
1357 }
1358
Marat Dukhande06f492020-04-09 00:19:31 -07001359 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001360 TEST_REQUIRES_ARM_NEON_FMA;
1361 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001362 for (uint32_t n = 1; n <= 8; n++) {
1363 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001364 GemmMicrokernelTester()
1365 .mr(1)
1366 .nr(8)
1367 .kr(1)
1368 .sr(1)
1369 .m(m)
1370 .n(n)
1371 .k(k)
1372 .cm_stride(11)
1373 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001374 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001375 }
1376 }
1377 }
1378 }
1379
Marat Dukhande06f492020-04-09 00:19:31 -07001380 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001381 TEST_REQUIRES_ARM_NEON_FMA;
1382 for (size_t k = 1; k <= 40; k += 9) {
1383 GemmMicrokernelTester()
1384 .mr(1)
1385 .nr(8)
1386 .kr(1)
1387 .sr(1)
1388 .m(1)
1389 .n(8)
1390 .k(k)
1391 .ks(3)
1392 .a_offset(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001393 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001394 }
1395 }
1396
Marat Dukhande06f492020-04-09 00:19:31 -07001397 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001398 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001399 for (size_t k = 1; k <= 40; k += 9) {
1400 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001401 GemmMicrokernelTester()
1402 .mr(1)
1403 .nr(8)
1404 .kr(1)
1405 .sr(1)
1406 .m(1)
1407 .n(8)
1408 .k(k)
1409 .ks(3)
1410 .a_offset(43)
1411 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001412 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001413 }
1414 }
1415 }
1416
Marat Dukhande06f492020-04-09 00:19:31 -07001417 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001418 TEST_REQUIRES_ARM_NEON_FMA;
1419 GemmMicrokernelTester()
1420 .mr(1)
1421 .nr(8)
1422 .kr(1)
1423 .sr(1)
1424 .m(1)
1425 .n(8)
1426 .k(8)
1427 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001428 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001429 }
1430
Marat Dukhande06f492020-04-09 00:19:31 -07001431 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001432 TEST_REQUIRES_ARM_NEON_FMA;
1433 GemmMicrokernelTester()
1434 .mr(1)
1435 .nr(8)
1436 .kr(1)
1437 .sr(1)
1438 .m(1)
1439 .n(8)
1440 .k(8)
1441 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001442 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001443 }
1444
Marat Dukhande06f492020-04-09 00:19:31 -07001445 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001446 TEST_REQUIRES_ARM_NEON_FMA;
1447 GemmMicrokernelTester()
1448 .mr(1)
1449 .nr(8)
1450 .kr(1)
1451 .sr(1)
1452 .m(1)
1453 .n(8)
1454 .k(8)
1455 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001456 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001457 }
1458#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1459
1460
1461#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07001462 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001463 TEST_REQUIRES_ARM_NEON_FMA;
1464 GemmMicrokernelTester()
1465 .mr(4)
1466 .nr(8)
1467 .kr(1)
1468 .sr(1)
1469 .m(4)
1470 .n(8)
1471 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001472 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001473 }
1474
Marat Dukhande06f492020-04-09 00:19:31 -07001475 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001476 TEST_REQUIRES_ARM_NEON_FMA;
1477 GemmMicrokernelTester()
1478 .mr(4)
1479 .nr(8)
1480 .kr(1)
1481 .sr(1)
1482 .m(4)
1483 .n(8)
1484 .k(4)
1485 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001486 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001487 }
1488
Marat Dukhande06f492020-04-09 00:19:31 -07001489 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001490 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001491 for (uint32_t n = 1; n <= 8; n++) {
1492 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001493 GemmMicrokernelTester()
1494 .mr(4)
1495 .nr(8)
1496 .kr(1)
1497 .sr(1)
1498 .m(m)
1499 .n(n)
1500 .k(4)
1501 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001502 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001503 }
1504 }
1505 }
1506
Marat Dukhande06f492020-04-09 00:19:31 -07001507 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001508 TEST_REQUIRES_ARM_NEON_FMA;
1509 for (uint32_t m = 1; m <= 4; m++) {
1510 GemmMicrokernelTester()
1511 .mr(4)
1512 .nr(8)
1513 .kr(1)
1514 .sr(1)
1515 .m(m)
1516 .n(8)
1517 .k(4)
1518 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001519 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001520 }
1521 }
1522
Marat Dukhande06f492020-04-09 00:19:31 -07001523 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001524 TEST_REQUIRES_ARM_NEON_FMA;
1525 for (uint32_t n = 1; n <= 8; n++) {
1526 GemmMicrokernelTester()
1527 .mr(4)
1528 .nr(8)
1529 .kr(1)
1530 .sr(1)
1531 .m(4)
1532 .n(n)
1533 .k(4)
1534 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001535 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001536 }
1537 }
1538
Marat Dukhande06f492020-04-09 00:19:31 -07001539 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001540 TEST_REQUIRES_ARM_NEON_FMA;
1541 GemmMicrokernelTester()
1542 .mr(4)
1543 .nr(8)
1544 .kr(1)
1545 .sr(1)
1546 .m(4)
1547 .n(8)
1548 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001549 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001550 }
1551
Marat Dukhande06f492020-04-09 00:19:31 -07001552 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001553 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001554 for (uint32_t n = 1; n <= 8; n++) {
1555 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001556 GemmMicrokernelTester()
1557 .mr(4)
1558 .nr(8)
1559 .kr(1)
1560 .sr(1)
1561 .m(m)
1562 .n(n)
1563 .k(8)
1564 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001565 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001566 }
1567 }
1568 }
1569
Marat Dukhande06f492020-04-09 00:19:31 -07001570 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001571 TEST_REQUIRES_ARM_NEON_FMA;
1572 for (size_t k = 1; k < 8; k++) {
1573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(8)
1576 .kr(1)
1577 .sr(1)
1578 .m(4)
1579 .n(8)
1580 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001581 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001582 }
1583 }
1584
Marat Dukhande06f492020-04-09 00:19:31 -07001585 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001586 TEST_REQUIRES_ARM_NEON_FMA;
1587 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001588 for (uint32_t n = 1; n <= 8; n++) {
1589 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001590 GemmMicrokernelTester()
1591 .mr(4)
1592 .nr(8)
1593 .kr(1)
1594 .sr(1)
1595 .m(m)
1596 .n(n)
1597 .k(k)
1598 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001599 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001600 }
1601 }
1602 }
1603 }
1604
Marat Dukhande06f492020-04-09 00:19:31 -07001605 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001606 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001607 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001608 GemmMicrokernelTester()
1609 .mr(4)
1610 .nr(8)
1611 .kr(1)
1612 .sr(1)
1613 .m(4)
1614 .n(8)
1615 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001616 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001617 }
1618 }
1619
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001620 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001621 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08001622 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001623 for (uint32_t n = 1; n <= 8; n++) {
1624 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001625 GemmMicrokernelTester()
1626 .mr(4)
1627 .nr(8)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(n)
1632 .k(k)
1633 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001634 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001635 }
1636 }
1637 }
1638 }
1639
Marat Dukhande06f492020-04-09 00:19:31 -07001640 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001641 TEST_REQUIRES_ARM_NEON_FMA;
1642 for (size_t k = 12; k <= 40; k += 4) {
1643 GemmMicrokernelTester()
1644 .mr(4)
1645 .nr(8)
1646 .kr(1)
1647 .sr(1)
1648 .m(4)
1649 .n(8)
1650 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001651 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001652 }
1653 }
1654
Marat Dukhande06f492020-04-09 00:19:31 -07001655 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001656 TEST_REQUIRES_ARM_NEON_FMA;
1657 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001658 for (uint32_t n = 1; n <= 8; n++) {
1659 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001660 GemmMicrokernelTester()
1661 .mr(4)
1662 .nr(8)
1663 .kr(1)
1664 .sr(1)
1665 .m(m)
1666 .n(n)
1667 .k(k)
1668 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001669 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001670 }
1671 }
1672 }
1673 }
1674
Marat Dukhande06f492020-04-09 00:19:31 -07001675 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001676 TEST_REQUIRES_ARM_NEON_FMA;
1677 for (uint32_t n = 9; n < 16; n++) {
1678 for (size_t k = 1; k <= 20; k += 5) {
1679 GemmMicrokernelTester()
1680 .mr(4)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001685 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001686 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001687 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001688 }
1689 }
1690 }
1691
Marat Dukhande06f492020-04-09 00:19:31 -07001692 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001693 TEST_REQUIRES_ARM_NEON_FMA;
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 20; k += 5) {
1696 GemmMicrokernelTester()
1697 .mr(4)
1698 .nr(8)
1699 .kr(1)
1700 .sr(1)
1701 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001702 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001703 .k(k)
1704 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001705 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001706 }
1707 }
1708 }
1709
Marat Dukhande06f492020-04-09 00:19:31 -07001710 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001711 TEST_REQUIRES_ARM_NEON_FMA;
1712 for (uint32_t n = 9; n < 16; n++) {
1713 for (size_t k = 1; k <= 20; k += 5) {
1714 for (uint32_t m = 1; m <= 4; m++) {
1715 GemmMicrokernelTester()
1716 .mr(4)
1717 .nr(8)
1718 .kr(1)
1719 .sr(1)
1720 .m(m)
1721 .n(n)
1722 .k(k)
1723 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001724 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001725 }
1726 }
1727 }
1728 }
1729
Marat Dukhande06f492020-04-09 00:19:31 -07001730 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001731 TEST_REQUIRES_ARM_NEON_FMA;
1732 for (uint32_t n = 16; n <= 24; n += 8) {
1733 for (size_t k = 1; k <= 20; k += 5) {
1734 GemmMicrokernelTester()
1735 .mr(4)
1736 .nr(8)
1737 .kr(1)
1738 .sr(1)
1739 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001740 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001741 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001742 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001743 }
1744 }
1745 }
1746
Marat Dukhande06f492020-04-09 00:19:31 -07001747 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001748 TEST_REQUIRES_ARM_NEON_FMA;
1749 for (uint32_t n = 16; n <= 24; n += 8) {
1750 for (size_t k = 1; k <= 20; k += 5) {
1751 GemmMicrokernelTester()
1752 .mr(4)
1753 .nr(8)
1754 .kr(1)
1755 .sr(1)
1756 .m(4)
1757 .n(n)
1758 .k(k)
1759 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001760 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001761 }
1762 }
1763 }
1764
Marat Dukhande06f492020-04-09 00:19:31 -07001765 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001766 TEST_REQUIRES_ARM_NEON_FMA;
1767 for (uint32_t n = 16; n <= 24; n += 8) {
1768 for (size_t k = 1; k <= 20; k += 5) {
1769 for (uint32_t m = 1; m <= 4; m++) {
1770 GemmMicrokernelTester()
1771 .mr(4)
1772 .nr(8)
1773 .kr(1)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001779 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001780 }
1781 }
1782 }
1783 }
1784
Marat Dukhande06f492020-04-09 00:19:31 -07001785 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001786 TEST_REQUIRES_ARM_NEON_FMA;
1787 for (size_t k = 1; k <= 20; k += 5) {
1788 GemmMicrokernelTester()
1789 .mr(4)
1790 .nr(8)
1791 .kr(1)
1792 .sr(1)
1793 .m(4)
1794 .n(8)
1795 .k(k)
1796 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001797 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001798 }
1799 }
1800
Marat Dukhande06f492020-04-09 00:19:31 -07001801 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001802 TEST_REQUIRES_ARM_NEON_FMA;
1803 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001804 for (uint32_t n = 1; n <= 8; n++) {
1805 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001806 GemmMicrokernelTester()
1807 .mr(4)
1808 .nr(8)
1809 .kr(1)
1810 .sr(1)
1811 .m(m)
1812 .n(n)
1813 .k(k)
1814 .ks(3)
1815 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001816 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001817 }
1818 }
1819 }
1820 }
1821
Marat Dukhande06f492020-04-09 00:19:31 -07001822 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001823 TEST_REQUIRES_ARM_NEON_FMA;
1824 for (uint32_t n = 9; n < 16; n++) {
1825 for (size_t k = 1; k <= 20; k += 5) {
1826 GemmMicrokernelTester()
1827 .mr(4)
1828 .nr(8)
1829 .kr(1)
1830 .sr(1)
1831 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001832 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001833 .k(k)
1834 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001835 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001836 }
1837 }
1838 }
1839
Marat Dukhande06f492020-04-09 00:19:31 -07001840 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001841 TEST_REQUIRES_ARM_NEON_FMA;
1842 for (uint32_t n = 16; n <= 24; n += 8) {
1843 for (size_t k = 1; k <= 20; k += 5) {
1844 GemmMicrokernelTester()
1845 .mr(4)
1846 .nr(8)
1847 .kr(1)
1848 .sr(1)
1849 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001850 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07001851 .k(k)
1852 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001853 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001854 }
1855 }
1856 }
1857
Marat Dukhande06f492020-04-09 00:19:31 -07001858 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001859 TEST_REQUIRES_ARM_NEON_FMA;
1860 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001861 for (uint32_t n = 1; n <= 8; n++) {
1862 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001863 GemmMicrokernelTester()
1864 .mr(4)
1865 .nr(8)
1866 .kr(1)
1867 .sr(1)
1868 .m(m)
1869 .n(n)
1870 .k(k)
1871 .cm_stride(11)
1872 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001873 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001874 }
1875 }
1876 }
1877 }
1878
Marat Dukhande06f492020-04-09 00:19:31 -07001879 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001880 TEST_REQUIRES_ARM_NEON_FMA;
1881 for (size_t k = 1; k <= 20; k += 5) {
1882 GemmMicrokernelTester()
1883 .mr(4)
1884 .nr(8)
1885 .kr(1)
1886 .sr(1)
1887 .m(4)
1888 .n(8)
1889 .k(k)
1890 .ks(3)
1891 .a_offset(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001892 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001893 }
1894 }
1895
Marat Dukhande06f492020-04-09 00:19:31 -07001896 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001897 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001898 for (size_t k = 1; k <= 20; k += 5) {
1899 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001900 GemmMicrokernelTester()
1901 .mr(4)
1902 .nr(8)
1903 .kr(1)
1904 .sr(1)
1905 .m(4)
1906 .n(8)
1907 .k(k)
1908 .ks(3)
1909 .a_offset(83)
1910 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001911 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001912 }
1913 }
1914 }
1915
Marat Dukhande06f492020-04-09 00:19:31 -07001916 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001917 TEST_REQUIRES_ARM_NEON_FMA;
1918 GemmMicrokernelTester()
1919 .mr(4)
1920 .nr(8)
1921 .kr(1)
1922 .sr(1)
1923 .m(4)
1924 .n(8)
1925 .k(4)
1926 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001927 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001928 }
1929
Marat Dukhande06f492020-04-09 00:19:31 -07001930 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001931 TEST_REQUIRES_ARM_NEON_FMA;
1932 GemmMicrokernelTester()
1933 .mr(4)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(4)
1938 .n(8)
1939 .k(4)
1940 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001941 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001942 }
1943
Marat Dukhande06f492020-04-09 00:19:31 -07001944 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07001945 TEST_REQUIRES_ARM_NEON_FMA;
1946 GemmMicrokernelTester()
1947 .mr(4)
1948 .nr(8)
1949 .kr(1)
1950 .sr(1)
1951 .m(4)
1952 .n(8)
1953 .k(4)
1954 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001955 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07001956 }
1957#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1958
1959
Marat Dukhanf6068062020-05-17 04:42:19 -07001960#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchard490febe2020-07-16 18:42:17 -07001961 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2) {
Frank Barchard569561d2020-06-17 13:11:12 -07001962 TEST_REQUIRES_ARM_NEON;
1963 GemmMicrokernelTester()
1964 .mr(4)
1965 .nr(8)
1966 .kr(1)
1967 .sr(1)
1968 .m(4)
1969 .n(8)
1970 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001971 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07001972 }
1973
Frank Barchard490febe2020-07-16 18:42:17 -07001974 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, strided_cn) {
Frank Barchard569561d2020-06-17 13:11:12 -07001975 TEST_REQUIRES_ARM_NEON;
1976 GemmMicrokernelTester()
1977 .mr(4)
1978 .nr(8)
1979 .kr(1)
1980 .sr(1)
1981 .m(4)
1982 .n(8)
1983 .k(2)
1984 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07001985 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07001986 }
1987
Frank Barchard490febe2020-07-16 18:42:17 -07001988 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07001989 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001990 for (uint32_t n = 1; n <= 8; n++) {
1991 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard569561d2020-06-17 13:11:12 -07001992 GemmMicrokernelTester()
1993 .mr(4)
1994 .nr(8)
1995 .kr(1)
1996 .sr(1)
1997 .m(m)
1998 .n(n)
1999 .k(2)
2000 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002001 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002002 }
2003 }
2004 }
2005
Frank Barchard490febe2020-07-16 18:42:17 -07002006 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_m) {
Frank Barchard569561d2020-06-17 13:11:12 -07002007 TEST_REQUIRES_ARM_NEON;
2008 for (uint32_t m = 1; m <= 4; m++) {
2009 GemmMicrokernelTester()
2010 .mr(4)
2011 .nr(8)
2012 .kr(1)
2013 .sr(1)
2014 .m(m)
2015 .n(8)
2016 .k(2)
2017 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002018 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002019 }
2020 }
2021
Frank Barchard490febe2020-07-16 18:42:17 -07002022 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_n) {
Frank Barchard569561d2020-06-17 13:11:12 -07002023 TEST_REQUIRES_ARM_NEON;
2024 for (uint32_t n = 1; n <= 8; n++) {
2025 GemmMicrokernelTester()
2026 .mr(4)
2027 .nr(8)
2028 .kr(1)
2029 .sr(1)
2030 .m(4)
2031 .n(n)
2032 .k(2)
2033 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002034 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002035 }
2036 }
2037
Frank Barchard490febe2020-07-16 18:42:17 -07002038 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2) {
Frank Barchard569561d2020-06-17 13:11:12 -07002039 TEST_REQUIRES_ARM_NEON;
2040 for (size_t k = 1; k < 2; k++) {
2041 GemmMicrokernelTester()
2042 .mr(4)
2043 .nr(8)
2044 .kr(1)
2045 .sr(1)
2046 .m(4)
2047 .n(8)
2048 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002049 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002050 }
2051 }
2052
Frank Barchard490febe2020-07-16 18:42:17 -07002053 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002054 TEST_REQUIRES_ARM_NEON;
2055 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002056 for (uint32_t n = 1; n <= 8; n++) {
2057 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard569561d2020-06-17 13:11:12 -07002058 GemmMicrokernelTester()
2059 .mr(4)
2060 .nr(8)
2061 .kr(1)
2062 .sr(1)
2063 .m(m)
2064 .n(n)
2065 .k(k)
2066 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002067 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002068 }
2069 }
2070 }
2071 }
2072
Frank Barchard490febe2020-07-16 18:42:17 -07002073 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2) {
Frank Barchard569561d2020-06-17 13:11:12 -07002074 TEST_REQUIRES_ARM_NEON;
2075 for (size_t k = 3; k < 4; k++) {
2076 GemmMicrokernelTester()
2077 .mr(4)
2078 .nr(8)
2079 .kr(1)
2080 .sr(1)
2081 .m(4)
2082 .n(8)
2083 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002084 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002085 }
2086 }
2087
Frank Barchard490febe2020-07-16 18:42:17 -07002088 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002089 TEST_REQUIRES_ARM_NEON;
2090 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002091 for (uint32_t n = 1; n <= 8; n++) {
2092 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard569561d2020-06-17 13:11:12 -07002093 GemmMicrokernelTester()
2094 .mr(4)
2095 .nr(8)
2096 .kr(1)
2097 .sr(1)
2098 .m(m)
2099 .n(n)
2100 .k(k)
2101 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002102 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002103 }
2104 }
2105 }
2106 }
2107
Frank Barchard490febe2020-07-16 18:42:17 -07002108 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_div_2) {
Frank Barchard569561d2020-06-17 13:11:12 -07002109 TEST_REQUIRES_ARM_NEON;
2110 for (size_t k = 4; k <= 20; k += 2) {
2111 GemmMicrokernelTester()
2112 .mr(4)
2113 .nr(8)
2114 .kr(1)
2115 .sr(1)
2116 .m(4)
2117 .n(8)
2118 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002119 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002120 }
2121 }
2122
Frank Barchard490febe2020-07-16 18:42:17 -07002123 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, k_div_2_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002124 TEST_REQUIRES_ARM_NEON;
2125 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002126 for (uint32_t n = 1; n <= 8; n++) {
2127 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard569561d2020-06-17 13:11:12 -07002128 GemmMicrokernelTester()
2129 .mr(4)
2130 .nr(8)
2131 .kr(1)
2132 .sr(1)
2133 .m(m)
2134 .n(n)
2135 .k(k)
2136 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002137 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002138 }
2139 }
2140 }
2141 }
2142
Frank Barchard490febe2020-07-16 18:42:17 -07002143 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8) {
Frank Barchard569561d2020-06-17 13:11:12 -07002144 TEST_REQUIRES_ARM_NEON;
2145 for (uint32_t n = 9; n < 16; n++) {
2146 for (size_t k = 1; k <= 10; k += 3) {
2147 GemmMicrokernelTester()
2148 .mr(4)
2149 .nr(8)
2150 .kr(1)
2151 .sr(1)
2152 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002153 .n(n)
Frank Barchard569561d2020-06-17 13:11:12 -07002154 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002155 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002156 }
2157 }
2158 }
2159
Frank Barchard490febe2020-07-16 18:42:17 -07002160 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_strided_cn) {
Frank Barchard569561d2020-06-17 13:11:12 -07002161 TEST_REQUIRES_ARM_NEON;
2162 for (uint32_t n = 9; n < 16; n++) {
2163 for (size_t k = 1; k <= 10; k += 3) {
2164 GemmMicrokernelTester()
2165 .mr(4)
2166 .nr(8)
2167 .kr(1)
2168 .sr(1)
2169 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002170 .n(n)
Frank Barchard569561d2020-06-17 13:11:12 -07002171 .k(k)
2172 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002173 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002174 }
2175 }
2176 }
2177
Frank Barchard490febe2020-07-16 18:42:17 -07002178 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002179 TEST_REQUIRES_ARM_NEON;
2180 for (uint32_t n = 9; n < 16; n++) {
2181 for (size_t k = 1; k <= 10; k += 3) {
2182 for (uint32_t m = 1; m <= 4; m++) {
2183 GemmMicrokernelTester()
2184 .mr(4)
2185 .nr(8)
2186 .kr(1)
2187 .sr(1)
2188 .m(m)
2189 .n(n)
2190 .k(k)
2191 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002192 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002193 }
2194 }
2195 }
2196 }
2197
Frank Barchard490febe2020-07-16 18:42:17 -07002198 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8) {
Frank Barchard569561d2020-06-17 13:11:12 -07002199 TEST_REQUIRES_ARM_NEON;
2200 for (uint32_t n = 16; n <= 24; n += 8) {
2201 for (size_t k = 1; k <= 10; k += 3) {
2202 GemmMicrokernelTester()
2203 .mr(4)
2204 .nr(8)
2205 .kr(1)
2206 .sr(1)
2207 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002208 .n(n)
Frank Barchard569561d2020-06-17 13:11:12 -07002209 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002210 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002211 }
2212 }
2213 }
2214
Frank Barchard490febe2020-07-16 18:42:17 -07002215 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_strided_cn) {
Frank Barchard569561d2020-06-17 13:11:12 -07002216 TEST_REQUIRES_ARM_NEON;
2217 for (uint32_t n = 16; n <= 24; n += 8) {
2218 for (size_t k = 1; k <= 10; k += 3) {
2219 GemmMicrokernelTester()
2220 .mr(4)
2221 .nr(8)
2222 .kr(1)
2223 .sr(1)
2224 .m(4)
2225 .n(n)
2226 .k(k)
2227 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002228 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002229 }
2230 }
2231 }
2232
Frank Barchard490febe2020-07-16 18:42:17 -07002233 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002234 TEST_REQUIRES_ARM_NEON;
2235 for (uint32_t n = 16; n <= 24; n += 8) {
2236 for (size_t k = 1; k <= 10; k += 3) {
2237 for (uint32_t m = 1; m <= 4; m++) {
2238 GemmMicrokernelTester()
2239 .mr(4)
2240 .nr(8)
2241 .kr(1)
2242 .sr(1)
2243 .m(m)
2244 .n(n)
2245 .k(k)
2246 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002247 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002248 }
2249 }
2250 }
2251 }
2252
Frank Barchard490febe2020-07-16 18:42:17 -07002253 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, small_kernel) {
Frank Barchard569561d2020-06-17 13:11:12 -07002254 TEST_REQUIRES_ARM_NEON;
2255 for (size_t k = 1; k <= 10; k += 3) {
2256 GemmMicrokernelTester()
2257 .mr(4)
2258 .nr(8)
2259 .kr(1)
2260 .sr(1)
2261 .m(4)
2262 .n(8)
2263 .k(k)
2264 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002265 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002266 }
2267 }
2268
Frank Barchard490febe2020-07-16 18:42:17 -07002269 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, small_kernel_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002270 TEST_REQUIRES_ARM_NEON;
2271 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002272 for (uint32_t n = 1; n <= 8; n++) {
2273 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard569561d2020-06-17 13:11:12 -07002274 GemmMicrokernelTester()
2275 .mr(4)
2276 .nr(8)
2277 .kr(1)
2278 .sr(1)
2279 .m(m)
2280 .n(n)
2281 .k(k)
2282 .ks(3)
2283 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002284 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002285 }
2286 }
2287 }
2288 }
2289
Frank Barchard490febe2020-07-16 18:42:17 -07002290 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_small_kernel) {
Frank Barchard569561d2020-06-17 13:11:12 -07002291 TEST_REQUIRES_ARM_NEON;
2292 for (uint32_t n = 9; n < 16; n++) {
2293 for (size_t k = 1; k <= 10; k += 3) {
2294 GemmMicrokernelTester()
2295 .mr(4)
2296 .nr(8)
2297 .kr(1)
2298 .sr(1)
2299 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002300 .n(n)
Frank Barchard569561d2020-06-17 13:11:12 -07002301 .k(k)
2302 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002303 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002304 }
2305 }
2306 }
2307
Frank Barchard490febe2020-07-16 18:42:17 -07002308 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_small_kernel) {
Frank Barchard569561d2020-06-17 13:11:12 -07002309 TEST_REQUIRES_ARM_NEON;
2310 for (uint32_t n = 16; n <= 24; n += 8) {
2311 for (size_t k = 1; k <= 10; k += 3) {
2312 GemmMicrokernelTester()
2313 .mr(4)
2314 .nr(8)
2315 .kr(1)
2316 .sr(1)
2317 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002318 .n(n)
Frank Barchard569561d2020-06-17 13:11:12 -07002319 .k(k)
2320 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002321 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002322 }
2323 }
2324 }
2325
Frank Barchard490febe2020-07-16 18:42:17 -07002326 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, strided_cm_subtile) {
Frank Barchard569561d2020-06-17 13:11:12 -07002327 TEST_REQUIRES_ARM_NEON;
2328 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002329 for (uint32_t n = 1; n <= 8; n++) {
2330 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard569561d2020-06-17 13:11:12 -07002331 GemmMicrokernelTester()
2332 .mr(4)
2333 .nr(8)
2334 .kr(1)
2335 .sr(1)
2336 .m(m)
2337 .n(n)
2338 .k(k)
2339 .cm_stride(11)
2340 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002341 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002342 }
2343 }
2344 }
2345 }
2346
Frank Barchard490febe2020-07-16 18:42:17 -07002347 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, a_offset) {
Frank Barchard569561d2020-06-17 13:11:12 -07002348 TEST_REQUIRES_ARM_NEON;
2349 for (size_t k = 1; k <= 10; k += 3) {
2350 GemmMicrokernelTester()
2351 .mr(4)
2352 .nr(8)
2353 .kr(1)
2354 .sr(1)
2355 .m(4)
2356 .n(8)
2357 .k(k)
2358 .ks(3)
2359 .a_offset(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002360 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002361 }
2362 }
2363
Frank Barchard490febe2020-07-16 18:42:17 -07002364 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, zero) {
Frank Barchard569561d2020-06-17 13:11:12 -07002365 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002366 for (size_t k = 1; k <= 10; k += 3) {
2367 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard569561d2020-06-17 13:11:12 -07002368 GemmMicrokernelTester()
2369 .mr(4)
2370 .nr(8)
2371 .kr(1)
2372 .sr(1)
2373 .m(4)
2374 .n(8)
2375 .k(k)
2376 .ks(3)
2377 .a_offset(43)
2378 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002379 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002380 }
2381 }
2382 }
2383
Frank Barchard490febe2020-07-16 18:42:17 -07002384 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, qmin) {
Frank Barchard569561d2020-06-17 13:11:12 -07002385 TEST_REQUIRES_ARM_NEON;
2386 GemmMicrokernelTester()
2387 .mr(4)
2388 .nr(8)
2389 .kr(1)
2390 .sr(1)
2391 .m(4)
2392 .n(8)
2393 .k(2)
2394 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002395 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002396 }
2397
Frank Barchard490febe2020-07-16 18:42:17 -07002398 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, qmax) {
Frank Barchard569561d2020-06-17 13:11:12 -07002399 TEST_REQUIRES_ARM_NEON;
2400 GemmMicrokernelTester()
2401 .mr(4)
2402 .nr(8)
2403 .kr(1)
2404 .sr(1)
2405 .m(4)
2406 .n(8)
2407 .k(2)
2408 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002409 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002410 }
2411
Frank Barchard490febe2020-07-16 18:42:17 -07002412 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A7, strided_cm) {
Frank Barchard569561d2020-06-17 13:11:12 -07002413 TEST_REQUIRES_ARM_NEON;
2414 GemmMicrokernelTester()
2415 .mr(4)
2416 .nr(8)
2417 .kr(1)
2418 .sr(1)
2419 .m(4)
2420 .n(8)
2421 .k(2)
2422 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002423 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -07002424 }
2425#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
2426
2427
2428#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07002429 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002430 TEST_REQUIRES_ARM_NEON;
2431 GemmMicrokernelTester()
2432 .mr(4)
2433 .nr(8)
2434 .kr(1)
2435 .sr(1)
2436 .m(4)
2437 .n(8)
2438 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002439 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002440 }
2441
Marat Dukhande06f492020-04-09 00:19:31 -07002442 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002443 TEST_REQUIRES_ARM_NEON;
2444 GemmMicrokernelTester()
2445 .mr(4)
2446 .nr(8)
2447 .kr(1)
2448 .sr(1)
2449 .m(4)
2450 .n(8)
2451 .k(4)
2452 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002453 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002454 }
2455
Marat Dukhande06f492020-04-09 00:19:31 -07002456 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002457 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002458 for (uint32_t n = 1; n <= 8; n++) {
2459 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002460 GemmMicrokernelTester()
2461 .mr(4)
2462 .nr(8)
2463 .kr(1)
2464 .sr(1)
2465 .m(m)
2466 .n(n)
2467 .k(4)
2468 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002469 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002470 }
2471 }
2472 }
2473
Marat Dukhande06f492020-04-09 00:19:31 -07002474 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002475 TEST_REQUIRES_ARM_NEON;
2476 for (uint32_t m = 1; m <= 4; m++) {
2477 GemmMicrokernelTester()
2478 .mr(4)
2479 .nr(8)
2480 .kr(1)
2481 .sr(1)
2482 .m(m)
2483 .n(8)
2484 .k(4)
2485 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002486 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002487 }
2488 }
2489
Marat Dukhande06f492020-04-09 00:19:31 -07002490 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002491 TEST_REQUIRES_ARM_NEON;
2492 for (uint32_t n = 1; n <= 8; n++) {
2493 GemmMicrokernelTester()
2494 .mr(4)
2495 .nr(8)
2496 .kr(1)
2497 .sr(1)
2498 .m(4)
2499 .n(n)
2500 .k(4)
2501 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002502 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002503 }
2504 }
2505
Marat Dukhande06f492020-04-09 00:19:31 -07002506 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002507 TEST_REQUIRES_ARM_NEON;
2508 GemmMicrokernelTester()
2509 .mr(4)
2510 .nr(8)
2511 .kr(1)
2512 .sr(1)
2513 .m(4)
2514 .n(8)
2515 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002516 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002517 }
2518
Marat Dukhande06f492020-04-09 00:19:31 -07002519 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002520 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002521 for (uint32_t n = 1; n <= 8; n++) {
2522 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002523 GemmMicrokernelTester()
2524 .mr(4)
2525 .nr(8)
2526 .kr(1)
2527 .sr(1)
2528 .m(m)
2529 .n(n)
2530 .k(8)
2531 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002532 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002533 }
2534 }
2535 }
2536
Marat Dukhande06f492020-04-09 00:19:31 -07002537 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002538 TEST_REQUIRES_ARM_NEON;
2539 for (size_t k = 1; k < 8; k++) {
2540 GemmMicrokernelTester()
2541 .mr(4)
2542 .nr(8)
2543 .kr(1)
2544 .sr(1)
2545 .m(4)
2546 .n(8)
2547 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002548 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002549 }
2550 }
2551
Marat Dukhande06f492020-04-09 00:19:31 -07002552 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002553 TEST_REQUIRES_ARM_NEON;
2554 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002555 for (uint32_t n = 1; n <= 8; n++) {
2556 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002557 GemmMicrokernelTester()
2558 .mr(4)
2559 .nr(8)
2560 .kr(1)
2561 .sr(1)
2562 .m(m)
2563 .n(n)
2564 .k(k)
2565 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002566 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002567 }
2568 }
2569 }
2570 }
2571
Marat Dukhande06f492020-04-09 00:19:31 -07002572 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002573 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002574 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002575 GemmMicrokernelTester()
2576 .mr(4)
2577 .nr(8)
2578 .kr(1)
2579 .sr(1)
2580 .m(4)
2581 .n(8)
2582 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002583 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002584 }
2585 }
2586
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002587 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002588 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08002589 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002590 for (uint32_t n = 1; n <= 8; n++) {
2591 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002592 GemmMicrokernelTester()
2593 .mr(4)
2594 .nr(8)
2595 .kr(1)
2596 .sr(1)
2597 .m(m)
2598 .n(n)
2599 .k(k)
2600 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002601 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002602 }
2603 }
2604 }
2605 }
2606
Marat Dukhande06f492020-04-09 00:19:31 -07002607 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002608 TEST_REQUIRES_ARM_NEON;
2609 for (size_t k = 12; k <= 40; k += 4) {
2610 GemmMicrokernelTester()
2611 .mr(4)
2612 .nr(8)
2613 .kr(1)
2614 .sr(1)
2615 .m(4)
2616 .n(8)
2617 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002618 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002619 }
2620 }
2621
Marat Dukhande06f492020-04-09 00:19:31 -07002622 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002623 TEST_REQUIRES_ARM_NEON;
2624 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002625 for (uint32_t n = 1; n <= 8; n++) {
2626 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002627 GemmMicrokernelTester()
2628 .mr(4)
2629 .nr(8)
2630 .kr(1)
2631 .sr(1)
2632 .m(m)
2633 .n(n)
2634 .k(k)
2635 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002636 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002637 }
2638 }
2639 }
2640 }
2641
Marat Dukhande06f492020-04-09 00:19:31 -07002642 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002643 TEST_REQUIRES_ARM_NEON;
2644 for (uint32_t n = 9; n < 16; n++) {
2645 for (size_t k = 1; k <= 20; k += 5) {
2646 GemmMicrokernelTester()
2647 .mr(4)
2648 .nr(8)
2649 .kr(1)
2650 .sr(1)
2651 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002652 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002653 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002654 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002655 }
2656 }
2657 }
2658
Marat Dukhande06f492020-04-09 00:19:31 -07002659 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002660 TEST_REQUIRES_ARM_NEON;
2661 for (uint32_t n = 9; n < 16; n++) {
2662 for (size_t k = 1; k <= 20; k += 5) {
2663 GemmMicrokernelTester()
2664 .mr(4)
2665 .nr(8)
2666 .kr(1)
2667 .sr(1)
2668 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002669 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002670 .k(k)
2671 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002672 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002673 }
2674 }
2675 }
2676
Marat Dukhande06f492020-04-09 00:19:31 -07002677 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002678 TEST_REQUIRES_ARM_NEON;
2679 for (uint32_t n = 9; n < 16; n++) {
2680 for (size_t k = 1; k <= 20; k += 5) {
2681 for (uint32_t m = 1; m <= 4; m++) {
2682 GemmMicrokernelTester()
2683 .mr(4)
2684 .nr(8)
2685 .kr(1)
2686 .sr(1)
2687 .m(m)
2688 .n(n)
2689 .k(k)
2690 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002691 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002692 }
2693 }
2694 }
2695 }
2696
Marat Dukhande06f492020-04-09 00:19:31 -07002697 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002698 TEST_REQUIRES_ARM_NEON;
2699 for (uint32_t n = 16; n <= 24; n += 8) {
2700 for (size_t k = 1; k <= 20; k += 5) {
2701 GemmMicrokernelTester()
2702 .mr(4)
2703 .nr(8)
2704 .kr(1)
2705 .sr(1)
2706 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002707 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002708 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002709 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002710 }
2711 }
2712 }
2713
Marat Dukhande06f492020-04-09 00:19:31 -07002714 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002715 TEST_REQUIRES_ARM_NEON;
2716 for (uint32_t n = 16; n <= 24; n += 8) {
2717 for (size_t k = 1; k <= 20; k += 5) {
2718 GemmMicrokernelTester()
2719 .mr(4)
2720 .nr(8)
2721 .kr(1)
2722 .sr(1)
2723 .m(4)
2724 .n(n)
2725 .k(k)
2726 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002727 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002728 }
2729 }
2730 }
2731
Marat Dukhande06f492020-04-09 00:19:31 -07002732 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002733 TEST_REQUIRES_ARM_NEON;
2734 for (uint32_t n = 16; n <= 24; n += 8) {
2735 for (size_t k = 1; k <= 20; k += 5) {
2736 for (uint32_t m = 1; m <= 4; m++) {
2737 GemmMicrokernelTester()
2738 .mr(4)
2739 .nr(8)
2740 .kr(1)
2741 .sr(1)
2742 .m(m)
2743 .n(n)
2744 .k(k)
2745 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002746 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002747 }
2748 }
2749 }
2750 }
2751
Marat Dukhande06f492020-04-09 00:19:31 -07002752 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002753 TEST_REQUIRES_ARM_NEON;
2754 for (size_t k = 1; k <= 20; k += 5) {
2755 GemmMicrokernelTester()
2756 .mr(4)
2757 .nr(8)
2758 .kr(1)
2759 .sr(1)
2760 .m(4)
2761 .n(8)
2762 .k(k)
2763 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002764 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002765 }
2766 }
2767
Marat Dukhande06f492020-04-09 00:19:31 -07002768 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002769 TEST_REQUIRES_ARM_NEON;
2770 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002771 for (uint32_t n = 1; n <= 8; n++) {
2772 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002773 GemmMicrokernelTester()
2774 .mr(4)
2775 .nr(8)
2776 .kr(1)
2777 .sr(1)
2778 .m(m)
2779 .n(n)
2780 .k(k)
2781 .ks(3)
2782 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002783 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002784 }
2785 }
2786 }
2787 }
2788
Marat Dukhande06f492020-04-09 00:19:31 -07002789 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002790 TEST_REQUIRES_ARM_NEON;
2791 for (uint32_t n = 9; n < 16; n++) {
2792 for (size_t k = 1; k <= 20; k += 5) {
2793 GemmMicrokernelTester()
2794 .mr(4)
2795 .nr(8)
2796 .kr(1)
2797 .sr(1)
2798 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002799 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002800 .k(k)
2801 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002802 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002803 }
2804 }
2805 }
2806
Marat Dukhande06f492020-04-09 00:19:31 -07002807 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002808 TEST_REQUIRES_ARM_NEON;
2809 for (uint32_t n = 16; n <= 24; n += 8) {
2810 for (size_t k = 1; k <= 20; k += 5) {
2811 GemmMicrokernelTester()
2812 .mr(4)
2813 .nr(8)
2814 .kr(1)
2815 .sr(1)
2816 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002817 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07002818 .k(k)
2819 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002820 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002821 }
2822 }
2823 }
2824
Marat Dukhande06f492020-04-09 00:19:31 -07002825 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002826 TEST_REQUIRES_ARM_NEON;
2827 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002828 for (uint32_t n = 1; n <= 8; n++) {
2829 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002830 GemmMicrokernelTester()
2831 .mr(4)
2832 .nr(8)
2833 .kr(1)
2834 .sr(1)
2835 .m(m)
2836 .n(n)
2837 .k(k)
2838 .cm_stride(11)
2839 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002840 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002841 }
2842 }
2843 }
2844 }
2845
Marat Dukhande06f492020-04-09 00:19:31 -07002846 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002847 TEST_REQUIRES_ARM_NEON;
2848 for (size_t k = 1; k <= 20; k += 5) {
2849 GemmMicrokernelTester()
2850 .mr(4)
2851 .nr(8)
2852 .kr(1)
2853 .sr(1)
2854 .m(4)
2855 .n(8)
2856 .k(k)
2857 .ks(3)
2858 .a_offset(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002859 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002860 }
2861 }
2862
Marat Dukhande06f492020-04-09 00:19:31 -07002863 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002864 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002865 for (size_t k = 1; k <= 20; k += 5) {
2866 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002867 GemmMicrokernelTester()
2868 .mr(4)
2869 .nr(8)
2870 .kr(1)
2871 .sr(1)
2872 .m(4)
2873 .n(8)
2874 .k(k)
2875 .ks(3)
2876 .a_offset(83)
2877 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002878 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002879 }
2880 }
2881 }
2882
Marat Dukhande06f492020-04-09 00:19:31 -07002883 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002884 TEST_REQUIRES_ARM_NEON;
2885 GemmMicrokernelTester()
2886 .mr(4)
2887 .nr(8)
2888 .kr(1)
2889 .sr(1)
2890 .m(4)
2891 .n(8)
2892 .k(4)
2893 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002894 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002895 }
2896
Marat Dukhande06f492020-04-09 00:19:31 -07002897 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002898 TEST_REQUIRES_ARM_NEON;
2899 GemmMicrokernelTester()
2900 .mr(4)
2901 .nr(8)
2902 .kr(1)
2903 .sr(1)
2904 .m(4)
2905 .n(8)
2906 .k(4)
2907 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002908 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002909 }
2910
Marat Dukhande06f492020-04-09 00:19:31 -07002911 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002912 TEST_REQUIRES_ARM_NEON;
2913 GemmMicrokernelTester()
2914 .mr(4)
2915 .nr(8)
2916 .kr(1)
2917 .sr(1)
2918 .m(4)
2919 .n(8)
2920 .k(4)
2921 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07002922 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002923 }
Marat Dukhanf6068062020-05-17 04:42:19 -07002924#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhan1c587112020-04-08 20:04:28 -07002925
2926
Marat Dukhand18cec32020-05-18 01:29:29 -07002927#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchard78735862022-01-04 16:47:44 -08002928 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002929 TEST_REQUIRES_ARM_NEON;
2930 GemmMicrokernelTester()
2931 .mr(4)
2932 .nr(8)
2933 .kr(1)
2934 .sr(1)
2935 .m(4)
2936 .n(8)
2937 .k(4)
Frank Barchard78735862022-01-04 16:47:44 -08002938 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002939 }
2940
Frank Barchard78735862022-01-04 16:47:44 -08002941 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002942 TEST_REQUIRES_ARM_NEON;
2943 GemmMicrokernelTester()
2944 .mr(4)
2945 .nr(8)
2946 .kr(1)
2947 .sr(1)
2948 .m(4)
2949 .n(8)
2950 .k(4)
2951 .cn_stride(11)
Frank Barchard78735862022-01-04 16:47:44 -08002952 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002953 }
2954
Frank Barchard78735862022-01-04 16:47:44 -08002955 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002956 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002957 for (uint32_t n = 1; n <= 8; n++) {
2958 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002959 GemmMicrokernelTester()
2960 .mr(4)
2961 .nr(8)
2962 .kr(1)
2963 .sr(1)
2964 .m(m)
2965 .n(n)
2966 .k(4)
2967 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08002968 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002969 }
2970 }
2971 }
2972
Frank Barchard78735862022-01-04 16:47:44 -08002973 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002974 TEST_REQUIRES_ARM_NEON;
2975 for (uint32_t m = 1; m <= 4; m++) {
2976 GemmMicrokernelTester()
2977 .mr(4)
2978 .nr(8)
2979 .kr(1)
2980 .sr(1)
2981 .m(m)
2982 .n(8)
2983 .k(4)
2984 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08002985 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07002986 }
2987 }
2988
Frank Barchard78735862022-01-04 16:47:44 -08002989 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07002990 TEST_REQUIRES_ARM_NEON;
2991 for (uint32_t n = 1; n <= 8; n++) {
2992 GemmMicrokernelTester()
2993 .mr(4)
2994 .nr(8)
2995 .kr(1)
2996 .sr(1)
2997 .m(4)
2998 .n(n)
2999 .k(4)
3000 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003001 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003002 }
3003 }
3004
Frank Barchard78735862022-01-04 16:47:44 -08003005 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003006 TEST_REQUIRES_ARM_NEON;
3007 GemmMicrokernelTester()
3008 .mr(4)
3009 .nr(8)
3010 .kr(1)
3011 .sr(1)
3012 .m(4)
3013 .n(8)
3014 .k(8)
Frank Barchard78735862022-01-04 16:47:44 -08003015 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003016 }
3017
Frank Barchard78735862022-01-04 16:47:44 -08003018 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003019 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003020 for (uint32_t n = 1; n <= 8; n++) {
3021 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003022 GemmMicrokernelTester()
3023 .mr(4)
3024 .nr(8)
3025 .kr(1)
3026 .sr(1)
3027 .m(m)
3028 .n(n)
3029 .k(8)
3030 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003031 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003032 }
3033 }
3034 }
3035
Frank Barchard78735862022-01-04 16:47:44 -08003036 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003037 TEST_REQUIRES_ARM_NEON;
3038 for (size_t k = 1; k < 8; k++) {
3039 GemmMicrokernelTester()
3040 .mr(4)
3041 .nr(8)
3042 .kr(1)
3043 .sr(1)
3044 .m(4)
3045 .n(8)
3046 .k(k)
Frank Barchard78735862022-01-04 16:47:44 -08003047 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003048 }
3049 }
3050
Frank Barchard78735862022-01-04 16:47:44 -08003051 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003052 TEST_REQUIRES_ARM_NEON;
3053 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003054 for (uint32_t n = 1; n <= 8; n++) {
3055 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003056 GemmMicrokernelTester()
3057 .mr(4)
3058 .nr(8)
3059 .kr(1)
3060 .sr(1)
3061 .m(m)
3062 .n(n)
3063 .k(k)
3064 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003065 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003066 }
3067 }
3068 }
3069 }
3070
Frank Barchard78735862022-01-04 16:47:44 -08003071 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003072 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003073 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003074 GemmMicrokernelTester()
3075 .mr(4)
3076 .nr(8)
3077 .kr(1)
3078 .sr(1)
3079 .m(4)
3080 .n(8)
3081 .k(k)
Frank Barchard78735862022-01-04 16:47:44 -08003082 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003083 }
3084 }
3085
Frank Barchard78735862022-01-04 16:47:44 -08003086 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003087 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003088 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003089 for (uint32_t n = 1; n <= 8; n++) {
3090 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003091 GemmMicrokernelTester()
3092 .mr(4)
3093 .nr(8)
3094 .kr(1)
3095 .sr(1)
3096 .m(m)
3097 .n(n)
3098 .k(k)
3099 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003100 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003101 }
3102 }
3103 }
3104 }
3105
Frank Barchard78735862022-01-04 16:47:44 -08003106 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003107 TEST_REQUIRES_ARM_NEON;
3108 for (size_t k = 12; k <= 40; k += 4) {
3109 GemmMicrokernelTester()
3110 .mr(4)
3111 .nr(8)
3112 .kr(1)
3113 .sr(1)
3114 .m(4)
3115 .n(8)
3116 .k(k)
Frank Barchard78735862022-01-04 16:47:44 -08003117 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003118 }
3119 }
3120
Frank Barchard78735862022-01-04 16:47:44 -08003121 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003122 TEST_REQUIRES_ARM_NEON;
3123 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003124 for (uint32_t n = 1; n <= 8; n++) {
3125 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003126 GemmMicrokernelTester()
3127 .mr(4)
3128 .nr(8)
3129 .kr(1)
3130 .sr(1)
3131 .m(m)
3132 .n(n)
3133 .k(k)
3134 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003135 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003136 }
3137 }
3138 }
3139 }
3140
Frank Barchard78735862022-01-04 16:47:44 -08003141 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003142 TEST_REQUIRES_ARM_NEON;
3143 for (uint32_t n = 9; n < 16; n++) {
3144 for (size_t k = 1; k <= 20; k += 5) {
3145 GemmMicrokernelTester()
3146 .mr(4)
3147 .nr(8)
3148 .kr(1)
3149 .sr(1)
3150 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003151 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003152 .k(k)
Frank Barchard78735862022-01-04 16:47:44 -08003153 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003154 }
3155 }
3156 }
3157
Frank Barchard78735862022-01-04 16:47:44 -08003158 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003159 TEST_REQUIRES_ARM_NEON;
3160 for (uint32_t n = 9; n < 16; n++) {
3161 for (size_t k = 1; k <= 20; k += 5) {
3162 GemmMicrokernelTester()
3163 .mr(4)
3164 .nr(8)
3165 .kr(1)
3166 .sr(1)
3167 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003168 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003169 .k(k)
3170 .cn_stride(11)
Frank Barchard78735862022-01-04 16:47:44 -08003171 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003172 }
3173 }
3174 }
3175
Frank Barchard78735862022-01-04 16:47:44 -08003176 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003177 TEST_REQUIRES_ARM_NEON;
3178 for (uint32_t n = 9; n < 16; n++) {
3179 for (size_t k = 1; k <= 20; k += 5) {
3180 for (uint32_t m = 1; m <= 4; m++) {
3181 GemmMicrokernelTester()
3182 .mr(4)
3183 .nr(8)
3184 .kr(1)
3185 .sr(1)
3186 .m(m)
3187 .n(n)
3188 .k(k)
3189 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003190 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003191 }
3192 }
3193 }
3194 }
3195
Frank Barchard78735862022-01-04 16:47:44 -08003196 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003197 TEST_REQUIRES_ARM_NEON;
3198 for (uint32_t n = 16; n <= 24; n += 8) {
3199 for (size_t k = 1; k <= 20; k += 5) {
3200 GemmMicrokernelTester()
3201 .mr(4)
3202 .nr(8)
3203 .kr(1)
3204 .sr(1)
3205 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003206 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003207 .k(k)
Frank Barchard78735862022-01-04 16:47:44 -08003208 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003209 }
3210 }
3211 }
3212
Frank Barchard78735862022-01-04 16:47:44 -08003213 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003214 TEST_REQUIRES_ARM_NEON;
3215 for (uint32_t n = 16; n <= 24; n += 8) {
3216 for (size_t k = 1; k <= 20; k += 5) {
3217 GemmMicrokernelTester()
3218 .mr(4)
3219 .nr(8)
3220 .kr(1)
3221 .sr(1)
3222 .m(4)
3223 .n(n)
3224 .k(k)
3225 .cn_stride(11)
Frank Barchard78735862022-01-04 16:47:44 -08003226 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003227 }
3228 }
3229 }
3230
Frank Barchard78735862022-01-04 16:47:44 -08003231 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003232 TEST_REQUIRES_ARM_NEON;
3233 for (uint32_t n = 16; n <= 24; n += 8) {
3234 for (size_t k = 1; k <= 20; k += 5) {
3235 for (uint32_t m = 1; m <= 4; m++) {
3236 GemmMicrokernelTester()
3237 .mr(4)
3238 .nr(8)
3239 .kr(1)
3240 .sr(1)
3241 .m(m)
3242 .n(n)
3243 .k(k)
3244 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003245 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003246 }
3247 }
3248 }
3249 }
3250
Frank Barchard78735862022-01-04 16:47:44 -08003251 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003252 TEST_REQUIRES_ARM_NEON;
3253 for (size_t k = 1; k <= 20; k += 5) {
3254 GemmMicrokernelTester()
3255 .mr(4)
3256 .nr(8)
3257 .kr(1)
3258 .sr(1)
3259 .m(4)
3260 .n(8)
3261 .k(k)
3262 .ks(3)
Frank Barchard78735862022-01-04 16:47:44 -08003263 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003264 }
3265 }
3266
Frank Barchard78735862022-01-04 16:47:44 -08003267 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003268 TEST_REQUIRES_ARM_NEON;
3269 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003270 for (uint32_t n = 1; n <= 8; n++) {
3271 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003272 GemmMicrokernelTester()
3273 .mr(4)
3274 .nr(8)
3275 .kr(1)
3276 .sr(1)
3277 .m(m)
3278 .n(n)
3279 .k(k)
3280 .ks(3)
3281 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003282 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003283 }
3284 }
3285 }
3286 }
3287
Frank Barchard78735862022-01-04 16:47:44 -08003288 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003289 TEST_REQUIRES_ARM_NEON;
3290 for (uint32_t n = 9; n < 16; n++) {
3291 for (size_t k = 1; k <= 20; k += 5) {
3292 GemmMicrokernelTester()
3293 .mr(4)
3294 .nr(8)
3295 .kr(1)
3296 .sr(1)
3297 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003298 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003299 .k(k)
3300 .ks(3)
Frank Barchard78735862022-01-04 16:47:44 -08003301 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003302 }
3303 }
3304 }
3305
Frank Barchard78735862022-01-04 16:47:44 -08003306 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003307 TEST_REQUIRES_ARM_NEON;
3308 for (uint32_t n = 16; n <= 24; n += 8) {
3309 for (size_t k = 1; k <= 20; k += 5) {
3310 GemmMicrokernelTester()
3311 .mr(4)
3312 .nr(8)
3313 .kr(1)
3314 .sr(1)
3315 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003316 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003317 .k(k)
3318 .ks(3)
Frank Barchard78735862022-01-04 16:47:44 -08003319 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003320 }
3321 }
3322 }
3323
Frank Barchard78735862022-01-04 16:47:44 -08003324 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003325 TEST_REQUIRES_ARM_NEON;
3326 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003327 for (uint32_t n = 1; n <= 8; n++) {
3328 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003329 GemmMicrokernelTester()
3330 .mr(4)
3331 .nr(8)
3332 .kr(1)
3333 .sr(1)
3334 .m(m)
3335 .n(n)
3336 .k(k)
3337 .cm_stride(11)
3338 .iterations(1)
Frank Barchard78735862022-01-04 16:47:44 -08003339 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003340 }
3341 }
3342 }
3343 }
3344
Frank Barchard78735862022-01-04 16:47:44 -08003345 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003346 TEST_REQUIRES_ARM_NEON;
3347 for (size_t k = 1; k <= 20; k += 5) {
3348 GemmMicrokernelTester()
3349 .mr(4)
3350 .nr(8)
3351 .kr(1)
3352 .sr(1)
3353 .m(4)
3354 .n(8)
3355 .k(k)
3356 .ks(3)
3357 .a_offset(83)
Frank Barchard78735862022-01-04 16:47:44 -08003358 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003359 }
3360 }
3361
Frank Barchard78735862022-01-04 16:47:44 -08003362 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003363 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003364 for (size_t k = 1; k <= 20; k += 5) {
3365 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003366 GemmMicrokernelTester()
3367 .mr(4)
3368 .nr(8)
3369 .kr(1)
3370 .sr(1)
3371 .m(4)
3372 .n(8)
3373 .k(k)
3374 .ks(3)
3375 .a_offset(83)
3376 .zero_index(mz)
Frank Barchard78735862022-01-04 16:47:44 -08003377 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003378 }
3379 }
3380 }
3381
Frank Barchard78735862022-01-04 16:47:44 -08003382 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003383 TEST_REQUIRES_ARM_NEON;
3384 GemmMicrokernelTester()
3385 .mr(4)
3386 .nr(8)
3387 .kr(1)
3388 .sr(1)
3389 .m(4)
3390 .n(8)
3391 .k(4)
3392 .qmin(128)
Frank Barchard78735862022-01-04 16:47:44 -08003393 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003394 }
3395
Frank Barchard78735862022-01-04 16:47:44 -08003396 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003397 TEST_REQUIRES_ARM_NEON;
3398 GemmMicrokernelTester()
3399 .mr(4)
3400 .nr(8)
3401 .kr(1)
3402 .sr(1)
3403 .m(4)
3404 .n(8)
3405 .k(4)
3406 .qmax(128)
Frank Barchard78735862022-01-04 16:47:44 -08003407 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003408 }
3409
Frank Barchard78735862022-01-04 16:47:44 -08003410 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003411 TEST_REQUIRES_ARM_NEON;
3412 GemmMicrokernelTester()
3413 .mr(4)
3414 .nr(8)
3415 .kr(1)
3416 .sr(1)
3417 .m(4)
3418 .n(8)
3419 .k(4)
3420 .cm_stride(11)
Frank Barchard78735862022-01-04 16:47:44 -08003421 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003422 }
Marat Dukhand18cec32020-05-18 01:29:29 -07003423#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhan1c587112020-04-08 20:04:28 -07003424
3425
Marat Dukhand18cec32020-05-18 01:29:29 -07003426#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07003427 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003428 TEST_REQUIRES_ARM_NEON;
3429 GemmMicrokernelTester()
3430 .mr(4)
3431 .nr(8)
3432 .kr(1)
3433 .sr(1)
3434 .m(4)
3435 .n(8)
3436 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003437 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003438 }
3439
Marat Dukhande06f492020-04-09 00:19:31 -07003440 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003441 TEST_REQUIRES_ARM_NEON;
3442 GemmMicrokernelTester()
3443 .mr(4)
3444 .nr(8)
3445 .kr(1)
3446 .sr(1)
3447 .m(4)
3448 .n(8)
3449 .k(4)
3450 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003451 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003452 }
3453
Marat Dukhande06f492020-04-09 00:19:31 -07003454 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003455 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003456 for (uint32_t n = 1; n <= 8; n++) {
3457 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003458 GemmMicrokernelTester()
3459 .mr(4)
3460 .nr(8)
3461 .kr(1)
3462 .sr(1)
3463 .m(m)
3464 .n(n)
3465 .k(4)
3466 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003467 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003468 }
3469 }
3470 }
3471
Marat Dukhande06f492020-04-09 00:19:31 -07003472 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003473 TEST_REQUIRES_ARM_NEON;
3474 for (uint32_t m = 1; m <= 4; m++) {
3475 GemmMicrokernelTester()
3476 .mr(4)
3477 .nr(8)
3478 .kr(1)
3479 .sr(1)
3480 .m(m)
3481 .n(8)
3482 .k(4)
3483 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003484 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003485 }
3486 }
3487
Marat Dukhande06f492020-04-09 00:19:31 -07003488 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003489 TEST_REQUIRES_ARM_NEON;
3490 for (uint32_t n = 1; n <= 8; n++) {
3491 GemmMicrokernelTester()
3492 .mr(4)
3493 .nr(8)
3494 .kr(1)
3495 .sr(1)
3496 .m(4)
3497 .n(n)
3498 .k(4)
3499 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003500 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003501 }
3502 }
3503
Marat Dukhande06f492020-04-09 00:19:31 -07003504 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003505 TEST_REQUIRES_ARM_NEON;
3506 GemmMicrokernelTester()
3507 .mr(4)
3508 .nr(8)
3509 .kr(1)
3510 .sr(1)
3511 .m(4)
3512 .n(8)
3513 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003514 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003515 }
3516
Marat Dukhande06f492020-04-09 00:19:31 -07003517 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003518 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003519 for (uint32_t n = 1; n <= 8; n++) {
3520 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003521 GemmMicrokernelTester()
3522 .mr(4)
3523 .nr(8)
3524 .kr(1)
3525 .sr(1)
3526 .m(m)
3527 .n(n)
3528 .k(8)
3529 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003531 }
3532 }
3533 }
3534
Marat Dukhande06f492020-04-09 00:19:31 -07003535 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003536 TEST_REQUIRES_ARM_NEON;
3537 for (size_t k = 1; k < 8; k++) {
3538 GemmMicrokernelTester()
3539 .mr(4)
3540 .nr(8)
3541 .kr(1)
3542 .sr(1)
3543 .m(4)
3544 .n(8)
3545 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003546 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003547 }
3548 }
3549
Marat Dukhande06f492020-04-09 00:19:31 -07003550 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003551 TEST_REQUIRES_ARM_NEON;
3552 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003553 for (uint32_t n = 1; n <= 8; n++) {
3554 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003555 GemmMicrokernelTester()
3556 .mr(4)
3557 .nr(8)
3558 .kr(1)
3559 .sr(1)
3560 .m(m)
3561 .n(n)
3562 .k(k)
3563 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003564 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003565 }
3566 }
3567 }
3568 }
3569
Marat Dukhande06f492020-04-09 00:19:31 -07003570 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003571 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003572 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003573 GemmMicrokernelTester()
3574 .mr(4)
3575 .nr(8)
3576 .kr(1)
3577 .sr(1)
3578 .m(4)
3579 .n(8)
3580 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003581 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003582 }
3583 }
3584
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003585 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003586 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08003587 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003588 for (uint32_t n = 1; n <= 8; n++) {
3589 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003590 GemmMicrokernelTester()
3591 .mr(4)
3592 .nr(8)
3593 .kr(1)
3594 .sr(1)
3595 .m(m)
3596 .n(n)
3597 .k(k)
3598 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003599 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003600 }
3601 }
3602 }
3603 }
3604
Marat Dukhande06f492020-04-09 00:19:31 -07003605 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003606 TEST_REQUIRES_ARM_NEON;
3607 for (size_t k = 12; k <= 40; k += 4) {
3608 GemmMicrokernelTester()
3609 .mr(4)
3610 .nr(8)
3611 .kr(1)
3612 .sr(1)
3613 .m(4)
3614 .n(8)
3615 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003616 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003617 }
3618 }
3619
Marat Dukhande06f492020-04-09 00:19:31 -07003620 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003621 TEST_REQUIRES_ARM_NEON;
3622 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003623 for (uint32_t n = 1; n <= 8; n++) {
3624 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003625 GemmMicrokernelTester()
3626 .mr(4)
3627 .nr(8)
3628 .kr(1)
3629 .sr(1)
3630 .m(m)
3631 .n(n)
3632 .k(k)
3633 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003634 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003635 }
3636 }
3637 }
3638 }
3639
Marat Dukhande06f492020-04-09 00:19:31 -07003640 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003641 TEST_REQUIRES_ARM_NEON;
3642 for (uint32_t n = 9; n < 16; n++) {
3643 for (size_t k = 1; k <= 20; k += 5) {
3644 GemmMicrokernelTester()
3645 .mr(4)
3646 .nr(8)
3647 .kr(1)
3648 .sr(1)
3649 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003650 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003651 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003652 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003653 }
3654 }
3655 }
3656
Marat Dukhande06f492020-04-09 00:19:31 -07003657 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003658 TEST_REQUIRES_ARM_NEON;
3659 for (uint32_t n = 9; n < 16; n++) {
3660 for (size_t k = 1; k <= 20; k += 5) {
3661 GemmMicrokernelTester()
3662 .mr(4)
3663 .nr(8)
3664 .kr(1)
3665 .sr(1)
3666 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003667 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003668 .k(k)
3669 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003670 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003671 }
3672 }
3673 }
3674
Marat Dukhande06f492020-04-09 00:19:31 -07003675 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003676 TEST_REQUIRES_ARM_NEON;
3677 for (uint32_t n = 9; n < 16; n++) {
3678 for (size_t k = 1; k <= 20; k += 5) {
3679 for (uint32_t m = 1; m <= 4; m++) {
3680 GemmMicrokernelTester()
3681 .mr(4)
3682 .nr(8)
3683 .kr(1)
3684 .sr(1)
3685 .m(m)
3686 .n(n)
3687 .k(k)
3688 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003689 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003690 }
3691 }
3692 }
3693 }
3694
Marat Dukhande06f492020-04-09 00:19:31 -07003695 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003696 TEST_REQUIRES_ARM_NEON;
3697 for (uint32_t n = 16; n <= 24; n += 8) {
3698 for (size_t k = 1; k <= 20; k += 5) {
3699 GemmMicrokernelTester()
3700 .mr(4)
3701 .nr(8)
3702 .kr(1)
3703 .sr(1)
3704 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003705 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003706 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003707 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003708 }
3709 }
3710 }
3711
Marat Dukhande06f492020-04-09 00:19:31 -07003712 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003713 TEST_REQUIRES_ARM_NEON;
3714 for (uint32_t n = 16; n <= 24; n += 8) {
3715 for (size_t k = 1; k <= 20; k += 5) {
3716 GemmMicrokernelTester()
3717 .mr(4)
3718 .nr(8)
3719 .kr(1)
3720 .sr(1)
3721 .m(4)
3722 .n(n)
3723 .k(k)
3724 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003725 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003726 }
3727 }
3728 }
3729
Marat Dukhande06f492020-04-09 00:19:31 -07003730 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003731 TEST_REQUIRES_ARM_NEON;
3732 for (uint32_t n = 16; n <= 24; n += 8) {
3733 for (size_t k = 1; k <= 20; k += 5) {
3734 for (uint32_t m = 1; m <= 4; m++) {
3735 GemmMicrokernelTester()
3736 .mr(4)
3737 .nr(8)
3738 .kr(1)
3739 .sr(1)
3740 .m(m)
3741 .n(n)
3742 .k(k)
3743 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003744 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003745 }
3746 }
3747 }
3748 }
3749
Marat Dukhande06f492020-04-09 00:19:31 -07003750 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003751 TEST_REQUIRES_ARM_NEON;
3752 for (size_t k = 1; k <= 20; k += 5) {
3753 GemmMicrokernelTester()
3754 .mr(4)
3755 .nr(8)
3756 .kr(1)
3757 .sr(1)
3758 .m(4)
3759 .n(8)
3760 .k(k)
3761 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003762 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003763 }
3764 }
3765
Marat Dukhande06f492020-04-09 00:19:31 -07003766 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003767 TEST_REQUIRES_ARM_NEON;
3768 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003769 for (uint32_t n = 1; n <= 8; n++) {
3770 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003771 GemmMicrokernelTester()
3772 .mr(4)
3773 .nr(8)
3774 .kr(1)
3775 .sr(1)
3776 .m(m)
3777 .n(n)
3778 .k(k)
3779 .ks(3)
3780 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003781 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003782 }
3783 }
3784 }
3785 }
3786
Marat Dukhande06f492020-04-09 00:19:31 -07003787 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003788 TEST_REQUIRES_ARM_NEON;
3789 for (uint32_t n = 9; n < 16; n++) {
3790 for (size_t k = 1; k <= 20; k += 5) {
3791 GemmMicrokernelTester()
3792 .mr(4)
3793 .nr(8)
3794 .kr(1)
3795 .sr(1)
3796 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003797 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003798 .k(k)
3799 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003800 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003801 }
3802 }
3803 }
3804
Marat Dukhande06f492020-04-09 00:19:31 -07003805 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003806 TEST_REQUIRES_ARM_NEON;
3807 for (uint32_t n = 16; n <= 24; n += 8) {
3808 for (size_t k = 1; k <= 20; k += 5) {
3809 GemmMicrokernelTester()
3810 .mr(4)
3811 .nr(8)
3812 .kr(1)
3813 .sr(1)
3814 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003815 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07003816 .k(k)
3817 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003818 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003819 }
3820 }
3821 }
3822
Marat Dukhande06f492020-04-09 00:19:31 -07003823 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003824 TEST_REQUIRES_ARM_NEON;
3825 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003826 for (uint32_t n = 1; n <= 8; n++) {
3827 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003828 GemmMicrokernelTester()
3829 .mr(4)
3830 .nr(8)
3831 .kr(1)
3832 .sr(1)
3833 .m(m)
3834 .n(n)
3835 .k(k)
3836 .cm_stride(11)
3837 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003838 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003839 }
3840 }
3841 }
3842 }
3843
Marat Dukhande06f492020-04-09 00:19:31 -07003844 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003845 TEST_REQUIRES_ARM_NEON;
3846 for (size_t k = 1; k <= 20; k += 5) {
3847 GemmMicrokernelTester()
3848 .mr(4)
3849 .nr(8)
3850 .kr(1)
3851 .sr(1)
3852 .m(4)
3853 .n(8)
3854 .k(k)
3855 .ks(3)
3856 .a_offset(83)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003857 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003858 }
3859 }
3860
Marat Dukhande06f492020-04-09 00:19:31 -07003861 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003862 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003863 for (size_t k = 1; k <= 20; k += 5) {
3864 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003865 GemmMicrokernelTester()
3866 .mr(4)
3867 .nr(8)
3868 .kr(1)
3869 .sr(1)
3870 .m(4)
3871 .n(8)
3872 .k(k)
3873 .ks(3)
3874 .a_offset(83)
3875 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003876 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003877 }
3878 }
3879 }
3880
Marat Dukhande06f492020-04-09 00:19:31 -07003881 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003882 TEST_REQUIRES_ARM_NEON;
3883 GemmMicrokernelTester()
3884 .mr(4)
3885 .nr(8)
3886 .kr(1)
3887 .sr(1)
3888 .m(4)
3889 .n(8)
3890 .k(4)
3891 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003892 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003893 }
3894
Marat Dukhande06f492020-04-09 00:19:31 -07003895 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003896 TEST_REQUIRES_ARM_NEON;
3897 GemmMicrokernelTester()
3898 .mr(4)
3899 .nr(8)
3900 .kr(1)
3901 .sr(1)
3902 .m(4)
3903 .n(8)
3904 .k(4)
3905 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003906 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003907 }
3908
Marat Dukhande06f492020-04-09 00:19:31 -07003909 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A55, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003910 TEST_REQUIRES_ARM_NEON;
3911 GemmMicrokernelTester()
3912 .mr(4)
3913 .nr(8)
3914 .kr(1)
3915 .sr(1)
3916 .m(4)
3917 .n(8)
3918 .k(4)
3919 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003920 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003921 }
Marat Dukhand18cec32020-05-18 01:29:29 -07003922#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Marat Dukhan1c587112020-04-08 20:04:28 -07003923
3924
3925#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07003926 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003927 TEST_REQUIRES_ARM_NEON_FMA;
3928 GemmMicrokernelTester()
3929 .mr(5)
3930 .nr(8)
3931 .kr(1)
3932 .sr(1)
3933 .m(5)
3934 .n(8)
3935 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003936 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003937 }
3938
Marat Dukhande06f492020-04-09 00:19:31 -07003939 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003940 TEST_REQUIRES_ARM_NEON_FMA;
3941 GemmMicrokernelTester()
3942 .mr(5)
3943 .nr(8)
3944 .kr(1)
3945 .sr(1)
3946 .m(5)
3947 .n(8)
3948 .k(8)
3949 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003950 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003951 }
3952
Marat Dukhande06f492020-04-09 00:19:31 -07003953 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003954 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003955 for (uint32_t n = 1; n <= 8; n++) {
3956 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003957 GemmMicrokernelTester()
3958 .mr(5)
3959 .nr(8)
3960 .kr(1)
3961 .sr(1)
3962 .m(m)
3963 .n(n)
3964 .k(8)
3965 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003966 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003967 }
3968 }
3969 }
3970
Marat Dukhande06f492020-04-09 00:19:31 -07003971 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003972 TEST_REQUIRES_ARM_NEON_FMA;
3973 for (uint32_t m = 1; m <= 5; m++) {
3974 GemmMicrokernelTester()
3975 .mr(5)
3976 .nr(8)
3977 .kr(1)
3978 .sr(1)
3979 .m(m)
3980 .n(8)
3981 .k(8)
3982 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003983 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07003984 }
3985 }
3986
Marat Dukhande06f492020-04-09 00:19:31 -07003987 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07003988 TEST_REQUIRES_ARM_NEON_FMA;
3989 for (uint32_t n = 1; n <= 8; n++) {
3990 GemmMicrokernelTester()
3991 .mr(5)
3992 .nr(8)
3993 .kr(1)
3994 .sr(1)
3995 .m(5)
3996 .n(n)
3997 .k(8)
3998 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07003999 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004000 }
4001 }
4002
Marat Dukhande06f492020-04-09 00:19:31 -07004003 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004004 TEST_REQUIRES_ARM_NEON_FMA;
4005 GemmMicrokernelTester()
4006 .mr(5)
4007 .nr(8)
4008 .kr(1)
4009 .sr(1)
4010 .m(5)
4011 .n(8)
4012 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004013 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004014 }
4015
Marat Dukhande06f492020-04-09 00:19:31 -07004016 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004017 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004018 for (uint32_t n = 1; n <= 8; n++) {
4019 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004020 GemmMicrokernelTester()
4021 .mr(5)
4022 .nr(8)
4023 .kr(1)
4024 .sr(1)
4025 .m(m)
4026 .n(n)
4027 .k(16)
4028 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004029 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004030 }
4031 }
4032 }
4033
Marat Dukhande06f492020-04-09 00:19:31 -07004034 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004035 TEST_REQUIRES_ARM_NEON_FMA;
4036 for (size_t k = 1; k < 16; k++) {
4037 GemmMicrokernelTester()
4038 .mr(5)
4039 .nr(8)
4040 .kr(1)
4041 .sr(1)
4042 .m(5)
4043 .n(8)
4044 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004045 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004046 }
4047 }
4048
Marat Dukhande06f492020-04-09 00:19:31 -07004049 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004050 TEST_REQUIRES_ARM_NEON_FMA;
4051 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004052 for (uint32_t n = 1; n <= 8; n++) {
4053 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004054 GemmMicrokernelTester()
4055 .mr(5)
4056 .nr(8)
4057 .kr(1)
4058 .sr(1)
4059 .m(m)
4060 .n(n)
4061 .k(k)
4062 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004063 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004064 }
4065 }
4066 }
4067 }
4068
Marat Dukhande06f492020-04-09 00:19:31 -07004069 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004070 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004071 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004072 GemmMicrokernelTester()
4073 .mr(5)
4074 .nr(8)
4075 .kr(1)
4076 .sr(1)
4077 .m(5)
4078 .n(8)
4079 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004080 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004081 }
4082 }
4083
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004084 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004085 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004086 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004087 for (uint32_t n = 1; n <= 8; n++) {
4088 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004089 GemmMicrokernelTester()
4090 .mr(5)
4091 .nr(8)
4092 .kr(1)
4093 .sr(1)
4094 .m(m)
4095 .n(n)
4096 .k(k)
4097 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004098 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004099 }
4100 }
4101 }
4102 }
4103
Marat Dukhande06f492020-04-09 00:19:31 -07004104 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004105 TEST_REQUIRES_ARM_NEON_FMA;
4106 for (size_t k = 24; k <= 80; k += 8) {
4107 GemmMicrokernelTester()
4108 .mr(5)
4109 .nr(8)
4110 .kr(1)
4111 .sr(1)
4112 .m(5)
4113 .n(8)
4114 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004115 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004116 }
4117 }
4118
Marat Dukhande06f492020-04-09 00:19:31 -07004119 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004120 TEST_REQUIRES_ARM_NEON_FMA;
4121 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004122 for (uint32_t n = 1; n <= 8; n++) {
4123 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004124 GemmMicrokernelTester()
4125 .mr(5)
4126 .nr(8)
4127 .kr(1)
4128 .sr(1)
4129 .m(m)
4130 .n(n)
4131 .k(k)
4132 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004133 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004134 }
4135 }
4136 }
4137 }
4138
Marat Dukhande06f492020-04-09 00:19:31 -07004139 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004140 TEST_REQUIRES_ARM_NEON_FMA;
4141 for (uint32_t n = 9; n < 16; n++) {
4142 for (size_t k = 1; k <= 40; k += 9) {
4143 GemmMicrokernelTester()
4144 .mr(5)
4145 .nr(8)
4146 .kr(1)
4147 .sr(1)
4148 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004149 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004150 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004151 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004152 }
4153 }
4154 }
4155
Marat Dukhande06f492020-04-09 00:19:31 -07004156 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004157 TEST_REQUIRES_ARM_NEON_FMA;
4158 for (uint32_t n = 9; n < 16; n++) {
4159 for (size_t k = 1; k <= 40; k += 9) {
4160 GemmMicrokernelTester()
4161 .mr(5)
4162 .nr(8)
4163 .kr(1)
4164 .sr(1)
4165 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004166 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004167 .k(k)
4168 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004169 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004170 }
4171 }
4172 }
4173
Marat Dukhande06f492020-04-09 00:19:31 -07004174 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004175 TEST_REQUIRES_ARM_NEON_FMA;
4176 for (uint32_t n = 9; n < 16; n++) {
4177 for (size_t k = 1; k <= 40; k += 9) {
4178 for (uint32_t m = 1; m <= 5; m++) {
4179 GemmMicrokernelTester()
4180 .mr(5)
4181 .nr(8)
4182 .kr(1)
4183 .sr(1)
4184 .m(m)
4185 .n(n)
4186 .k(k)
4187 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004188 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004189 }
4190 }
4191 }
4192 }
4193
Marat Dukhande06f492020-04-09 00:19:31 -07004194 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004195 TEST_REQUIRES_ARM_NEON_FMA;
4196 for (uint32_t n = 16; n <= 24; n += 8) {
4197 for (size_t k = 1; k <= 40; k += 9) {
4198 GemmMicrokernelTester()
4199 .mr(5)
4200 .nr(8)
4201 .kr(1)
4202 .sr(1)
4203 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004204 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004205 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004206 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004207 }
4208 }
4209 }
4210
Marat Dukhande06f492020-04-09 00:19:31 -07004211 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004212 TEST_REQUIRES_ARM_NEON_FMA;
4213 for (uint32_t n = 16; n <= 24; n += 8) {
4214 for (size_t k = 1; k <= 40; k += 9) {
4215 GemmMicrokernelTester()
4216 .mr(5)
4217 .nr(8)
4218 .kr(1)
4219 .sr(1)
4220 .m(5)
4221 .n(n)
4222 .k(k)
4223 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004224 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004225 }
4226 }
4227 }
4228
Marat Dukhande06f492020-04-09 00:19:31 -07004229 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004230 TEST_REQUIRES_ARM_NEON_FMA;
4231 for (uint32_t n = 16; n <= 24; n += 8) {
4232 for (size_t k = 1; k <= 40; k += 9) {
4233 for (uint32_t m = 1; m <= 5; m++) {
4234 GemmMicrokernelTester()
4235 .mr(5)
4236 .nr(8)
4237 .kr(1)
4238 .sr(1)
4239 .m(m)
4240 .n(n)
4241 .k(k)
4242 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004243 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004244 }
4245 }
4246 }
4247 }
4248
Marat Dukhande06f492020-04-09 00:19:31 -07004249 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004250 TEST_REQUIRES_ARM_NEON_FMA;
4251 for (size_t k = 1; k <= 40; k += 9) {
4252 GemmMicrokernelTester()
4253 .mr(5)
4254 .nr(8)
4255 .kr(1)
4256 .sr(1)
4257 .m(5)
4258 .n(8)
4259 .k(k)
4260 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004261 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004262 }
4263 }
4264
Marat Dukhande06f492020-04-09 00:19:31 -07004265 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004266 TEST_REQUIRES_ARM_NEON_FMA;
4267 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004268 for (uint32_t n = 1; n <= 8; n++) {
4269 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004270 GemmMicrokernelTester()
4271 .mr(5)
4272 .nr(8)
4273 .kr(1)
4274 .sr(1)
4275 .m(m)
4276 .n(n)
4277 .k(k)
4278 .ks(3)
4279 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004280 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004281 }
4282 }
4283 }
4284 }
4285
Marat Dukhande06f492020-04-09 00:19:31 -07004286 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004287 TEST_REQUIRES_ARM_NEON_FMA;
4288 for (uint32_t n = 9; n < 16; n++) {
4289 for (size_t k = 1; k <= 40; k += 9) {
4290 GemmMicrokernelTester()
4291 .mr(5)
4292 .nr(8)
4293 .kr(1)
4294 .sr(1)
4295 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004296 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004297 .k(k)
4298 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004299 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004300 }
4301 }
4302 }
4303
Marat Dukhande06f492020-04-09 00:19:31 -07004304 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004305 TEST_REQUIRES_ARM_NEON_FMA;
4306 for (uint32_t n = 16; n <= 24; n += 8) {
4307 for (size_t k = 1; k <= 40; k += 9) {
4308 GemmMicrokernelTester()
4309 .mr(5)
4310 .nr(8)
4311 .kr(1)
4312 .sr(1)
4313 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004314 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07004315 .k(k)
4316 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004317 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004318 }
4319 }
4320 }
4321
Marat Dukhande06f492020-04-09 00:19:31 -07004322 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004323 TEST_REQUIRES_ARM_NEON_FMA;
4324 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004325 for (uint32_t n = 1; n <= 8; n++) {
4326 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004327 GemmMicrokernelTester()
4328 .mr(5)
4329 .nr(8)
4330 .kr(1)
4331 .sr(1)
4332 .m(m)
4333 .n(n)
4334 .k(k)
4335 .cm_stride(11)
4336 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004337 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004338 }
4339 }
4340 }
4341 }
4342
Marat Dukhande06f492020-04-09 00:19:31 -07004343 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004344 TEST_REQUIRES_ARM_NEON_FMA;
4345 for (size_t k = 1; k <= 40; k += 9) {
4346 GemmMicrokernelTester()
4347 .mr(5)
4348 .nr(8)
4349 .kr(1)
4350 .sr(1)
4351 .m(5)
4352 .n(8)
4353 .k(k)
4354 .ks(3)
4355 .a_offset(211)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004356 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004357 }
4358 }
4359
Marat Dukhande06f492020-04-09 00:19:31 -07004360 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004361 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004362 for (size_t k = 1; k <= 40; k += 9) {
4363 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004364 GemmMicrokernelTester()
4365 .mr(5)
4366 .nr(8)
4367 .kr(1)
4368 .sr(1)
4369 .m(5)
4370 .n(8)
4371 .k(k)
4372 .ks(3)
4373 .a_offset(211)
4374 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004375 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004376 }
4377 }
4378 }
4379
Marat Dukhande06f492020-04-09 00:19:31 -07004380 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004381 TEST_REQUIRES_ARM_NEON_FMA;
4382 GemmMicrokernelTester()
4383 .mr(5)
4384 .nr(8)
4385 .kr(1)
4386 .sr(1)
4387 .m(5)
4388 .n(8)
4389 .k(8)
4390 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004391 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004392 }
4393
Marat Dukhande06f492020-04-09 00:19:31 -07004394 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004395 TEST_REQUIRES_ARM_NEON_FMA;
4396 GemmMicrokernelTester()
4397 .mr(5)
4398 .nr(8)
4399 .kr(1)
4400 .sr(1)
4401 .m(5)
4402 .n(8)
4403 .k(8)
4404 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004405 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004406 }
4407
Marat Dukhande06f492020-04-09 00:19:31 -07004408 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004409 TEST_REQUIRES_ARM_NEON_FMA;
4410 GemmMicrokernelTester()
4411 .mr(5)
4412 .nr(8)
4413 .kr(1)
4414 .sr(1)
4415 .m(5)
4416 .n(8)
4417 .k(8)
4418 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004419 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004420 }
4421#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4422
4423
4424#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -07004425 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
4426 TEST_REQUIRES_ARM_NEON_FMA;
4427 GemmMicrokernelTester()
4428 .mr(5)
4429 .nr(8)
4430 .kr(1)
4431 .sr(1)
4432 .m(5)
4433 .n(8)
4434 .k(8)
4435 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4436 }
4437
4438 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
4439 TEST_REQUIRES_ARM_NEON_FMA;
4440 GemmMicrokernelTester()
4441 .mr(5)
4442 .nr(8)
4443 .kr(1)
4444 .sr(1)
4445 .m(5)
4446 .n(8)
4447 .k(8)
4448 .cn_stride(11)
4449 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4450 }
4451
4452 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
4453 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004454 for (uint32_t n = 1; n <= 8; n++) {
4455 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004456 GemmMicrokernelTester()
4457 .mr(5)
4458 .nr(8)
4459 .kr(1)
4460 .sr(1)
4461 .m(m)
4462 .n(n)
4463 .k(8)
4464 .iterations(1)
4465 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4466 }
4467 }
4468 }
4469
4470 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
4471 TEST_REQUIRES_ARM_NEON_FMA;
4472 for (uint32_t m = 1; m <= 5; m++) {
4473 GemmMicrokernelTester()
4474 .mr(5)
4475 .nr(8)
4476 .kr(1)
4477 .sr(1)
4478 .m(m)
4479 .n(8)
4480 .k(8)
4481 .iterations(1)
4482 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4483 }
4484 }
4485
4486 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
4487 TEST_REQUIRES_ARM_NEON_FMA;
4488 for (uint32_t n = 1; n <= 8; n++) {
4489 GemmMicrokernelTester()
4490 .mr(5)
4491 .nr(8)
4492 .kr(1)
4493 .sr(1)
4494 .m(5)
4495 .n(n)
4496 .k(8)
4497 .iterations(1)
4498 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4499 }
4500 }
4501
4502 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
4503 TEST_REQUIRES_ARM_NEON_FMA;
4504 GemmMicrokernelTester()
4505 .mr(5)
4506 .nr(8)
4507 .kr(1)
4508 .sr(1)
4509 .m(5)
4510 .n(8)
4511 .k(16)
4512 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4513 }
4514
4515 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
4516 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004517 for (uint32_t n = 1; n <= 8; n++) {
4518 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004519 GemmMicrokernelTester()
4520 .mr(5)
4521 .nr(8)
4522 .kr(1)
4523 .sr(1)
4524 .m(m)
4525 .n(n)
4526 .k(16)
4527 .iterations(1)
4528 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4529 }
4530 }
4531 }
4532
4533 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
4534 TEST_REQUIRES_ARM_NEON_FMA;
4535 for (size_t k = 1; k < 16; k++) {
4536 GemmMicrokernelTester()
4537 .mr(5)
4538 .nr(8)
4539 .kr(1)
4540 .sr(1)
4541 .m(5)
4542 .n(8)
4543 .k(k)
4544 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4545 }
4546 }
4547
4548 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
4549 TEST_REQUIRES_ARM_NEON_FMA;
4550 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004551 for (uint32_t n = 1; n <= 8; n++) {
4552 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004553 GemmMicrokernelTester()
4554 .mr(5)
4555 .nr(8)
4556 .kr(1)
4557 .sr(1)
4558 .m(m)
4559 .n(n)
4560 .k(k)
4561 .iterations(1)
4562 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4563 }
4564 }
4565 }
4566 }
4567
4568 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
4569 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004570 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004571 GemmMicrokernelTester()
4572 .mr(5)
4573 .nr(8)
4574 .kr(1)
4575 .sr(1)
4576 .m(5)
4577 .n(8)
4578 .k(k)
4579 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4580 }
4581 }
4582
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004583 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
Frank Barchard143a1102021-06-15 09:15:34 -07004584 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08004585 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004586 for (uint32_t n = 1; n <= 8; n++) {
4587 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004588 GemmMicrokernelTester()
4589 .mr(5)
4590 .nr(8)
4591 .kr(1)
4592 .sr(1)
4593 .m(m)
4594 .n(n)
4595 .k(k)
4596 .iterations(1)
4597 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4598 }
4599 }
4600 }
4601 }
4602
4603 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
4604 TEST_REQUIRES_ARM_NEON_FMA;
4605 for (size_t k = 24; k <= 80; k += 8) {
4606 GemmMicrokernelTester()
4607 .mr(5)
4608 .nr(8)
4609 .kr(1)
4610 .sr(1)
4611 .m(5)
4612 .n(8)
4613 .k(k)
4614 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4615 }
4616 }
4617
4618 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
4619 TEST_REQUIRES_ARM_NEON_FMA;
4620 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004621 for (uint32_t n = 1; n <= 8; n++) {
4622 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004623 GemmMicrokernelTester()
4624 .mr(5)
4625 .nr(8)
4626 .kr(1)
4627 .sr(1)
4628 .m(m)
4629 .n(n)
4630 .k(k)
4631 .iterations(1)
4632 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4633 }
4634 }
4635 }
4636 }
4637
4638 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
4639 TEST_REQUIRES_ARM_NEON_FMA;
4640 for (uint32_t n = 9; n < 16; n++) {
4641 for (size_t k = 1; k <= 40; k += 9) {
4642 GemmMicrokernelTester()
4643 .mr(5)
4644 .nr(8)
4645 .kr(1)
4646 .sr(1)
4647 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004648 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07004649 .k(k)
4650 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4651 }
4652 }
4653 }
4654
4655 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
4656 TEST_REQUIRES_ARM_NEON_FMA;
4657 for (uint32_t n = 9; n < 16; n++) {
4658 for (size_t k = 1; k <= 40; k += 9) {
4659 GemmMicrokernelTester()
4660 .mr(5)
4661 .nr(8)
4662 .kr(1)
4663 .sr(1)
4664 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004665 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07004666 .k(k)
4667 .cn_stride(11)
4668 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4669 }
4670 }
4671 }
4672
4673 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
4674 TEST_REQUIRES_ARM_NEON_FMA;
4675 for (uint32_t n = 9; n < 16; n++) {
4676 for (size_t k = 1; k <= 40; k += 9) {
4677 for (uint32_t m = 1; m <= 5; m++) {
4678 GemmMicrokernelTester()
4679 .mr(5)
4680 .nr(8)
4681 .kr(1)
4682 .sr(1)
4683 .m(m)
4684 .n(n)
4685 .k(k)
4686 .iterations(1)
4687 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4688 }
4689 }
4690 }
4691 }
4692
4693 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
4694 TEST_REQUIRES_ARM_NEON_FMA;
4695 for (uint32_t n = 16; n <= 24; n += 8) {
4696 for (size_t k = 1; k <= 40; k += 9) {
4697 GemmMicrokernelTester()
4698 .mr(5)
4699 .nr(8)
4700 .kr(1)
4701 .sr(1)
4702 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004703 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07004704 .k(k)
4705 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4706 }
4707 }
4708 }
4709
4710 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
4711 TEST_REQUIRES_ARM_NEON_FMA;
4712 for (uint32_t n = 16; n <= 24; n += 8) {
4713 for (size_t k = 1; k <= 40; k += 9) {
4714 GemmMicrokernelTester()
4715 .mr(5)
4716 .nr(8)
4717 .kr(1)
4718 .sr(1)
4719 .m(5)
4720 .n(n)
4721 .k(k)
4722 .cn_stride(11)
4723 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4724 }
4725 }
4726 }
4727
4728 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
4729 TEST_REQUIRES_ARM_NEON_FMA;
4730 for (uint32_t n = 16; n <= 24; n += 8) {
4731 for (size_t k = 1; k <= 40; k += 9) {
4732 for (uint32_t m = 1; m <= 5; m++) {
4733 GemmMicrokernelTester()
4734 .mr(5)
4735 .nr(8)
4736 .kr(1)
4737 .sr(1)
4738 .m(m)
4739 .n(n)
4740 .k(k)
4741 .iterations(1)
4742 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4743 }
4744 }
4745 }
4746 }
4747
4748 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
4749 TEST_REQUIRES_ARM_NEON_FMA;
4750 for (size_t k = 1; k <= 40; k += 9) {
4751 GemmMicrokernelTester()
4752 .mr(5)
4753 .nr(8)
4754 .kr(1)
4755 .sr(1)
4756 .m(5)
4757 .n(8)
4758 .k(k)
4759 .ks(3)
4760 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4761 }
4762 }
4763
4764 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
4765 TEST_REQUIRES_ARM_NEON_FMA;
4766 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004767 for (uint32_t n = 1; n <= 8; n++) {
4768 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004769 GemmMicrokernelTester()
4770 .mr(5)
4771 .nr(8)
4772 .kr(1)
4773 .sr(1)
4774 .m(m)
4775 .n(n)
4776 .k(k)
4777 .ks(3)
4778 .iterations(1)
4779 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4780 }
4781 }
4782 }
4783 }
4784
4785 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
4786 TEST_REQUIRES_ARM_NEON_FMA;
4787 for (uint32_t n = 9; n < 16; n++) {
4788 for (size_t k = 1; k <= 40; k += 9) {
4789 GemmMicrokernelTester()
4790 .mr(5)
4791 .nr(8)
4792 .kr(1)
4793 .sr(1)
4794 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004795 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07004796 .k(k)
4797 .ks(3)
4798 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4799 }
4800 }
4801 }
4802
4803 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
4804 TEST_REQUIRES_ARM_NEON_FMA;
4805 for (uint32_t n = 16; n <= 24; n += 8) {
4806 for (size_t k = 1; k <= 40; k += 9) {
4807 GemmMicrokernelTester()
4808 .mr(5)
4809 .nr(8)
4810 .kr(1)
4811 .sr(1)
4812 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004813 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07004814 .k(k)
4815 .ks(3)
4816 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4817 }
4818 }
4819 }
4820
4821 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
4822 TEST_REQUIRES_ARM_NEON_FMA;
4823 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004824 for (uint32_t n = 1; n <= 8; n++) {
4825 for (uint32_t m = 1; m <= 5; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004826 GemmMicrokernelTester()
4827 .mr(5)
4828 .nr(8)
4829 .kr(1)
4830 .sr(1)
4831 .m(m)
4832 .n(n)
4833 .k(k)
4834 .cm_stride(11)
4835 .iterations(1)
4836 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4837 }
4838 }
4839 }
4840 }
4841
4842 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
4843 TEST_REQUIRES_ARM_NEON_FMA;
4844 for (size_t k = 1; k <= 40; k += 9) {
4845 GemmMicrokernelTester()
4846 .mr(5)
4847 .nr(8)
4848 .kr(1)
4849 .sr(1)
4850 .m(5)
4851 .n(8)
4852 .k(k)
4853 .ks(3)
4854 .a_offset(211)
4855 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4856 }
4857 }
4858
4859 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
4860 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004861 for (size_t k = 1; k <= 40; k += 9) {
4862 for (uint32_t mz = 0; mz < 5; mz++) {
Frank Barchard143a1102021-06-15 09:15:34 -07004863 GemmMicrokernelTester()
4864 .mr(5)
4865 .nr(8)
4866 .kr(1)
4867 .sr(1)
4868 .m(5)
4869 .n(8)
4870 .k(k)
4871 .ks(3)
4872 .a_offset(211)
4873 .zero_index(mz)
4874 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4875 }
4876 }
4877 }
4878
4879 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
4880 TEST_REQUIRES_ARM_NEON_FMA;
4881 GemmMicrokernelTester()
4882 .mr(5)
4883 .nr(8)
4884 .kr(1)
4885 .sr(1)
4886 .m(5)
4887 .n(8)
4888 .k(8)
4889 .qmin(128)
4890 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4891 }
4892
4893 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
4894 TEST_REQUIRES_ARM_NEON_FMA;
4895 GemmMicrokernelTester()
4896 .mr(5)
4897 .nr(8)
4898 .kr(1)
4899 .sr(1)
4900 .m(5)
4901 .n(8)
4902 .k(8)
4903 .qmax(128)
4904 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4905 }
4906
4907 TEST(F32_IGEMM_MINMAX_5X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
4908 TEST_REQUIRES_ARM_NEON_FMA;
4909 GemmMicrokernelTester()
4910 .mr(5)
4911 .nr(8)
4912 .kr(1)
4913 .sr(1)
4914 .m(5)
4915 .n(8)
4916 .k(8)
4917 .cm_stride(11)
4918 .Test(xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
4919 }
4920#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4921
4922
4923#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07004924 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004925 TEST_REQUIRES_ARM_NEON_FMA;
4926 GemmMicrokernelTester()
4927 .mr(6)
4928 .nr(8)
4929 .kr(1)
4930 .sr(1)
4931 .m(6)
4932 .n(8)
4933 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004934 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004935 }
4936
Marat Dukhande06f492020-04-09 00:19:31 -07004937 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004938 TEST_REQUIRES_ARM_NEON_FMA;
4939 GemmMicrokernelTester()
4940 .mr(6)
4941 .nr(8)
4942 .kr(1)
4943 .sr(1)
4944 .m(6)
4945 .n(8)
4946 .k(4)
4947 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004948 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004949 }
4950
Marat Dukhande06f492020-04-09 00:19:31 -07004951 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004952 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004953 for (uint32_t n = 1; n <= 8; n++) {
4954 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004955 GemmMicrokernelTester()
4956 .mr(6)
4957 .nr(8)
4958 .kr(1)
4959 .sr(1)
4960 .m(m)
4961 .n(n)
4962 .k(4)
4963 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004964 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004965 }
4966 }
4967 }
4968
Marat Dukhande06f492020-04-09 00:19:31 -07004969 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004970 TEST_REQUIRES_ARM_NEON_FMA;
4971 for (uint32_t m = 1; m <= 6; m++) {
4972 GemmMicrokernelTester()
4973 .mr(6)
4974 .nr(8)
4975 .kr(1)
4976 .sr(1)
4977 .m(m)
4978 .n(8)
4979 .k(4)
4980 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004981 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004982 }
4983 }
4984
Marat Dukhande06f492020-04-09 00:19:31 -07004985 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07004986 TEST_REQUIRES_ARM_NEON_FMA;
4987 for (uint32_t n = 1; n <= 8; n++) {
4988 GemmMicrokernelTester()
4989 .mr(6)
4990 .nr(8)
4991 .kr(1)
4992 .sr(1)
4993 .m(6)
4994 .n(n)
4995 .k(4)
4996 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07004997 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07004998 }
4999 }
5000
Marat Dukhande06f492020-04-09 00:19:31 -07005001 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005002 TEST_REQUIRES_ARM_NEON_FMA;
5003 GemmMicrokernelTester()
5004 .mr(6)
5005 .nr(8)
5006 .kr(1)
5007 .sr(1)
5008 .m(6)
5009 .n(8)
5010 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005011 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005012 }
5013
Marat Dukhande06f492020-04-09 00:19:31 -07005014 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005015 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005016 for (uint32_t n = 1; n <= 8; n++) {
5017 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005018 GemmMicrokernelTester()
5019 .mr(6)
5020 .nr(8)
5021 .kr(1)
5022 .sr(1)
5023 .m(m)
5024 .n(n)
5025 .k(8)
5026 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005027 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005028 }
5029 }
5030 }
5031
Marat Dukhande06f492020-04-09 00:19:31 -07005032 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005033 TEST_REQUIRES_ARM_NEON_FMA;
5034 for (size_t k = 1; k < 8; k++) {
5035 GemmMicrokernelTester()
5036 .mr(6)
5037 .nr(8)
5038 .kr(1)
5039 .sr(1)
5040 .m(6)
5041 .n(8)
5042 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005043 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005044 }
5045 }
5046
Marat Dukhande06f492020-04-09 00:19:31 -07005047 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005048 TEST_REQUIRES_ARM_NEON_FMA;
5049 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005050 for (uint32_t n = 1; n <= 8; n++) {
5051 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005052 GemmMicrokernelTester()
5053 .mr(6)
5054 .nr(8)
5055 .kr(1)
5056 .sr(1)
5057 .m(m)
5058 .n(n)
5059 .k(k)
5060 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005061 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005062 }
5063 }
5064 }
5065 }
5066
Marat Dukhande06f492020-04-09 00:19:31 -07005067 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005068 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08005069 for (size_t k = 9; k < 16; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005070 GemmMicrokernelTester()
5071 .mr(6)
5072 .nr(8)
5073 .kr(1)
5074 .sr(1)
5075 .m(6)
5076 .n(8)
5077 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005078 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005079 }
5080 }
5081
Zhi An Ngc80ffb02021-12-22 13:06:25 -08005082 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005083 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08005084 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005085 for (uint32_t n = 1; n <= 8; n++) {
5086 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005087 GemmMicrokernelTester()
5088 .mr(6)
5089 .nr(8)
5090 .kr(1)
5091 .sr(1)
5092 .m(m)
5093 .n(n)
5094 .k(k)
5095 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005096 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005097 }
5098 }
5099 }
5100 }
5101
Marat Dukhande06f492020-04-09 00:19:31 -07005102 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005103 TEST_REQUIRES_ARM_NEON_FMA;
5104 for (size_t k = 12; k <= 40; k += 4) {
5105 GemmMicrokernelTester()
5106 .mr(6)
5107 .nr(8)
5108 .kr(1)
5109 .sr(1)
5110 .m(6)
5111 .n(8)
5112 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005113 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005114 }
5115 }
5116
Marat Dukhande06f492020-04-09 00:19:31 -07005117 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005118 TEST_REQUIRES_ARM_NEON_FMA;
5119 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005120 for (uint32_t n = 1; n <= 8; n++) {
5121 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005122 GemmMicrokernelTester()
5123 .mr(6)
5124 .nr(8)
5125 .kr(1)
5126 .sr(1)
5127 .m(m)
5128 .n(n)
5129 .k(k)
5130 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005131 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005132 }
5133 }
5134 }
5135 }
5136
Marat Dukhande06f492020-04-09 00:19:31 -07005137 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005138 TEST_REQUIRES_ARM_NEON_FMA;
5139 for (uint32_t n = 9; n < 16; n++) {
5140 for (size_t k = 1; k <= 20; k += 5) {
5141 GemmMicrokernelTester()
5142 .mr(6)
5143 .nr(8)
5144 .kr(1)
5145 .sr(1)
5146 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005147 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005148 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005149 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005150 }
5151 }
5152 }
5153
Marat Dukhande06f492020-04-09 00:19:31 -07005154 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005155 TEST_REQUIRES_ARM_NEON_FMA;
5156 for (uint32_t n = 9; n < 16; n++) {
5157 for (size_t k = 1; k <= 20; k += 5) {
5158 GemmMicrokernelTester()
5159 .mr(6)
5160 .nr(8)
5161 .kr(1)
5162 .sr(1)
5163 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005164 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005165 .k(k)
5166 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005167 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005168 }
5169 }
5170 }
5171
Marat Dukhande06f492020-04-09 00:19:31 -07005172 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005173 TEST_REQUIRES_ARM_NEON_FMA;
5174 for (uint32_t n = 9; n < 16; n++) {
5175 for (size_t k = 1; k <= 20; k += 5) {
5176 for (uint32_t m = 1; m <= 6; m++) {
5177 GemmMicrokernelTester()
5178 .mr(6)
5179 .nr(8)
5180 .kr(1)
5181 .sr(1)
5182 .m(m)
5183 .n(n)
5184 .k(k)
5185 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005186 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005187 }
5188 }
5189 }
5190 }
5191
Marat Dukhande06f492020-04-09 00:19:31 -07005192 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005193 TEST_REQUIRES_ARM_NEON_FMA;
5194 for (uint32_t n = 16; n <= 24; n += 8) {
5195 for (size_t k = 1; k <= 20; k += 5) {
5196 GemmMicrokernelTester()
5197 .mr(6)
5198 .nr(8)
5199 .kr(1)
5200 .sr(1)
5201 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005202 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005203 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005204 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005205 }
5206 }
5207 }
5208
Marat Dukhande06f492020-04-09 00:19:31 -07005209 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005210 TEST_REQUIRES_ARM_NEON_FMA;
5211 for (uint32_t n = 16; n <= 24; n += 8) {
5212 for (size_t k = 1; k <= 20; k += 5) {
5213 GemmMicrokernelTester()
5214 .mr(6)
5215 .nr(8)
5216 .kr(1)
5217 .sr(1)
5218 .m(6)
5219 .n(n)
5220 .k(k)
5221 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005222 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005223 }
5224 }
5225 }
5226
Marat Dukhande06f492020-04-09 00:19:31 -07005227 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005228 TEST_REQUIRES_ARM_NEON_FMA;
5229 for (uint32_t n = 16; n <= 24; n += 8) {
5230 for (size_t k = 1; k <= 20; k += 5) {
5231 for (uint32_t m = 1; m <= 6; m++) {
5232 GemmMicrokernelTester()
5233 .mr(6)
5234 .nr(8)
5235 .kr(1)
5236 .sr(1)
5237 .m(m)
5238 .n(n)
5239 .k(k)
5240 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005241 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005242 }
5243 }
5244 }
5245 }
5246
Marat Dukhande06f492020-04-09 00:19:31 -07005247 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005248 TEST_REQUIRES_ARM_NEON_FMA;
5249 for (size_t k = 1; k <= 20; k += 5) {
5250 GemmMicrokernelTester()
5251 .mr(6)
5252 .nr(8)
5253 .kr(1)
5254 .sr(1)
5255 .m(6)
5256 .n(8)
5257 .k(k)
5258 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005259 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005260 }
5261 }
5262
Marat Dukhande06f492020-04-09 00:19:31 -07005263 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005264 TEST_REQUIRES_ARM_NEON_FMA;
5265 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005266 for (uint32_t n = 1; n <= 8; n++) {
5267 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005268 GemmMicrokernelTester()
5269 .mr(6)
5270 .nr(8)
5271 .kr(1)
5272 .sr(1)
5273 .m(m)
5274 .n(n)
5275 .k(k)
5276 .ks(3)
5277 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005278 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005279 }
5280 }
5281 }
5282 }
5283
Marat Dukhande06f492020-04-09 00:19:31 -07005284 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005285 TEST_REQUIRES_ARM_NEON_FMA;
5286 for (uint32_t n = 9; n < 16; n++) {
5287 for (size_t k = 1; k <= 20; k += 5) {
5288 GemmMicrokernelTester()
5289 .mr(6)
5290 .nr(8)
5291 .kr(1)
5292 .sr(1)
5293 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005294 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005295 .k(k)
5296 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005297 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005298 }
5299 }
5300 }
5301
Marat Dukhande06f492020-04-09 00:19:31 -07005302 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005303 TEST_REQUIRES_ARM_NEON_FMA;
5304 for (uint32_t n = 16; n <= 24; n += 8) {
5305 for (size_t k = 1; k <= 20; k += 5) {
5306 GemmMicrokernelTester()
5307 .mr(6)
5308 .nr(8)
5309 .kr(1)
5310 .sr(1)
5311 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005312 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005313 .k(k)
5314 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005315 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005316 }
5317 }
5318 }
5319
Marat Dukhande06f492020-04-09 00:19:31 -07005320 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005321 TEST_REQUIRES_ARM_NEON_FMA;
5322 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005323 for (uint32_t n = 1; n <= 8; n++) {
5324 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005325 GemmMicrokernelTester()
5326 .mr(6)
5327 .nr(8)
5328 .kr(1)
5329 .sr(1)
5330 .m(m)
5331 .n(n)
5332 .k(k)
5333 .cm_stride(11)
5334 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005335 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005336 }
5337 }
5338 }
5339 }
5340
Marat Dukhande06f492020-04-09 00:19:31 -07005341 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005342 TEST_REQUIRES_ARM_NEON_FMA;
5343 for (size_t k = 1; k <= 20; k += 5) {
5344 GemmMicrokernelTester()
5345 .mr(6)
5346 .nr(8)
5347 .kr(1)
5348 .sr(1)
5349 .m(6)
5350 .n(8)
5351 .k(k)
5352 .ks(3)
5353 .a_offset(127)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005354 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005355 }
5356 }
5357
Marat Dukhande06f492020-04-09 00:19:31 -07005358 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005359 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005360 for (size_t k = 1; k <= 20; k += 5) {
5361 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005362 GemmMicrokernelTester()
5363 .mr(6)
5364 .nr(8)
5365 .kr(1)
5366 .sr(1)
5367 .m(6)
5368 .n(8)
5369 .k(k)
5370 .ks(3)
5371 .a_offset(127)
5372 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005373 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005374 }
5375 }
5376 }
5377
Marat Dukhande06f492020-04-09 00:19:31 -07005378 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005379 TEST_REQUIRES_ARM_NEON_FMA;
5380 GemmMicrokernelTester()
5381 .mr(6)
5382 .nr(8)
5383 .kr(1)
5384 .sr(1)
5385 .m(6)
5386 .n(8)
5387 .k(4)
5388 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005389 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005390 }
5391
Marat Dukhande06f492020-04-09 00:19:31 -07005392 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005393 TEST_REQUIRES_ARM_NEON_FMA;
5394 GemmMicrokernelTester()
5395 .mr(6)
5396 .nr(8)
5397 .kr(1)
5398 .sr(1)
5399 .m(6)
5400 .n(8)
5401 .k(4)
5402 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005403 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005404 }
5405
Marat Dukhande06f492020-04-09 00:19:31 -07005406 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005407 TEST_REQUIRES_ARM_NEON_FMA;
5408 GemmMicrokernelTester()
5409 .mr(6)
5410 .nr(8)
5411 .kr(1)
5412 .sr(1)
5413 .m(6)
5414 .n(8)
5415 .k(4)
5416 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005417 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005418 }
5419#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5420
5421
5422#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhande06f492020-04-09 00:19:31 -07005423 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005424 TEST_REQUIRES_ARM_NEON_FMA;
5425 GemmMicrokernelTester()
5426 .mr(6)
5427 .nr(8)
5428 .kr(1)
5429 .sr(1)
5430 .m(6)
5431 .n(8)
5432 .k(8)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005433 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005434 }
5435
Marat Dukhande06f492020-04-09 00:19:31 -07005436 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005437 TEST_REQUIRES_ARM_NEON_FMA;
5438 GemmMicrokernelTester()
5439 .mr(6)
5440 .nr(8)
5441 .kr(1)
5442 .sr(1)
5443 .m(6)
5444 .n(8)
5445 .k(8)
5446 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005447 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005448 }
5449
Marat Dukhande06f492020-04-09 00:19:31 -07005450 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005451 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005452 for (uint32_t n = 1; n <= 8; n++) {
5453 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005454 GemmMicrokernelTester()
5455 .mr(6)
5456 .nr(8)
5457 .kr(1)
5458 .sr(1)
5459 .m(m)
5460 .n(n)
5461 .k(8)
5462 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005463 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005464 }
5465 }
5466 }
5467
Marat Dukhande06f492020-04-09 00:19:31 -07005468 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005469 TEST_REQUIRES_ARM_NEON_FMA;
5470 for (uint32_t m = 1; m <= 6; m++) {
5471 GemmMicrokernelTester()
5472 .mr(6)
5473 .nr(8)
5474 .kr(1)
5475 .sr(1)
5476 .m(m)
5477 .n(8)
5478 .k(8)
5479 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005480 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005481 }
5482 }
5483
Marat Dukhande06f492020-04-09 00:19:31 -07005484 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_8_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005485 TEST_REQUIRES_ARM_NEON_FMA;
5486 for (uint32_t n = 1; n <= 8; n++) {
5487 GemmMicrokernelTester()
5488 .mr(6)
5489 .nr(8)
5490 .kr(1)
5491 .sr(1)
5492 .m(6)
5493 .n(n)
5494 .k(8)
5495 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005496 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005497 }
5498 }
5499
Marat Dukhande06f492020-04-09 00:19:31 -07005500 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005501 TEST_REQUIRES_ARM_NEON_FMA;
5502 GemmMicrokernelTester()
5503 .mr(6)
5504 .nr(8)
5505 .kr(1)
5506 .sr(1)
5507 .m(6)
5508 .n(8)
5509 .k(16)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005510 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005511 }
5512
Marat Dukhande06f492020-04-09 00:19:31 -07005513 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_eq_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005514 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005515 for (uint32_t n = 1; n <= 8; n++) {
5516 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005517 GemmMicrokernelTester()
5518 .mr(6)
5519 .nr(8)
5520 .kr(1)
5521 .sr(1)
5522 .m(m)
5523 .n(n)
5524 .k(16)
5525 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005526 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005527 }
5528 }
5529 }
5530
Marat Dukhande06f492020-04-09 00:19:31 -07005531 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005532 TEST_REQUIRES_ARM_NEON_FMA;
5533 for (size_t k = 1; k < 16; k++) {
5534 GemmMicrokernelTester()
5535 .mr(6)
5536 .nr(8)
5537 .kr(1)
5538 .sr(1)
5539 .m(6)
5540 .n(8)
5541 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005542 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005543 }
5544 }
5545
Marat Dukhande06f492020-04-09 00:19:31 -07005546 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_lt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005547 TEST_REQUIRES_ARM_NEON_FMA;
5548 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005549 for (uint32_t n = 1; n <= 8; n++) {
5550 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005551 GemmMicrokernelTester()
5552 .mr(6)
5553 .nr(8)
5554 .kr(1)
5555 .sr(1)
5556 .m(m)
5557 .n(n)
5558 .k(k)
5559 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005560 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005561 }
5562 }
5563 }
5564 }
5565
Marat Dukhande06f492020-04-09 00:19:31 -07005566 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005567 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08005568 for (size_t k = 17; k < 32; k++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005569 GemmMicrokernelTester()
5570 .mr(6)
5571 .nr(8)
5572 .kr(1)
5573 .sr(1)
5574 .m(6)
5575 .n(8)
5576 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005577 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005578 }
5579 }
5580
Zhi An Ngc80ffb02021-12-22 13:06:25 -08005581 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005582 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08005583 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005584 for (uint32_t n = 1; n <= 8; n++) {
5585 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005586 GemmMicrokernelTester()
5587 .mr(6)
5588 .nr(8)
5589 .kr(1)
5590 .sr(1)
5591 .m(m)
5592 .n(n)
5593 .k(k)
5594 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005595 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005596 }
5597 }
5598 }
5599 }
5600
Marat Dukhande06f492020-04-09 00:19:31 -07005601 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005602 TEST_REQUIRES_ARM_NEON_FMA;
5603 for (size_t k = 24; k <= 80; k += 8) {
5604 GemmMicrokernelTester()
5605 .mr(6)
5606 .nr(8)
5607 .kr(1)
5608 .sr(1)
5609 .m(6)
5610 .n(8)
5611 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005612 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005613 }
5614 }
5615
Marat Dukhande06f492020-04-09 00:19:31 -07005616 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, k_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005617 TEST_REQUIRES_ARM_NEON_FMA;
5618 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005619 for (uint32_t n = 1; n <= 8; n++) {
5620 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005621 GemmMicrokernelTester()
5622 .mr(6)
5623 .nr(8)
5624 .kr(1)
5625 .sr(1)
5626 .m(m)
5627 .n(n)
5628 .k(k)
5629 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005630 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005631 }
5632 }
5633 }
5634 }
5635
Marat Dukhande06f492020-04-09 00:19:31 -07005636 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005637 TEST_REQUIRES_ARM_NEON_FMA;
5638 for (uint32_t n = 9; n < 16; n++) {
5639 for (size_t k = 1; k <= 40; k += 9) {
5640 GemmMicrokernelTester()
5641 .mr(6)
5642 .nr(8)
5643 .kr(1)
5644 .sr(1)
5645 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005646 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005647 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005648 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005649 }
5650 }
5651 }
5652
Marat Dukhande06f492020-04-09 00:19:31 -07005653 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005654 TEST_REQUIRES_ARM_NEON_FMA;
5655 for (uint32_t n = 9; n < 16; n++) {
5656 for (size_t k = 1; k <= 40; k += 9) {
5657 GemmMicrokernelTester()
5658 .mr(6)
5659 .nr(8)
5660 .kr(1)
5661 .sr(1)
5662 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005663 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005664 .k(k)
5665 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005666 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005667 }
5668 }
5669 }
5670
Marat Dukhande06f492020-04-09 00:19:31 -07005671 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005672 TEST_REQUIRES_ARM_NEON_FMA;
5673 for (uint32_t n = 9; n < 16; n++) {
5674 for (size_t k = 1; k <= 40; k += 9) {
5675 for (uint32_t m = 1; m <= 6; m++) {
5676 GemmMicrokernelTester()
5677 .mr(6)
5678 .nr(8)
5679 .kr(1)
5680 .sr(1)
5681 .m(m)
5682 .n(n)
5683 .k(k)
5684 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005685 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005686 }
5687 }
5688 }
5689 }
5690
Marat Dukhande06f492020-04-09 00:19:31 -07005691 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005692 TEST_REQUIRES_ARM_NEON_FMA;
5693 for (uint32_t n = 16; n <= 24; n += 8) {
5694 for (size_t k = 1; k <= 40; k += 9) {
5695 GemmMicrokernelTester()
5696 .mr(6)
5697 .nr(8)
5698 .kr(1)
5699 .sr(1)
5700 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005701 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005702 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005703 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005704 }
5705 }
5706 }
5707
Marat Dukhande06f492020-04-09 00:19:31 -07005708 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005709 TEST_REQUIRES_ARM_NEON_FMA;
5710 for (uint32_t n = 16; n <= 24; n += 8) {
5711 for (size_t k = 1; k <= 40; k += 9) {
5712 GemmMicrokernelTester()
5713 .mr(6)
5714 .nr(8)
5715 .kr(1)
5716 .sr(1)
5717 .m(6)
5718 .n(n)
5719 .k(k)
5720 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005721 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005722 }
5723 }
5724 }
5725
Marat Dukhande06f492020-04-09 00:19:31 -07005726 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005727 TEST_REQUIRES_ARM_NEON_FMA;
5728 for (uint32_t n = 16; n <= 24; n += 8) {
5729 for (size_t k = 1; k <= 40; k += 9) {
5730 for (uint32_t m = 1; m <= 6; m++) {
5731 GemmMicrokernelTester()
5732 .mr(6)
5733 .nr(8)
5734 .kr(1)
5735 .sr(1)
5736 .m(m)
5737 .n(n)
5738 .k(k)
5739 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005740 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005741 }
5742 }
5743 }
5744 }
5745
Marat Dukhande06f492020-04-09 00:19:31 -07005746 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005747 TEST_REQUIRES_ARM_NEON_FMA;
5748 for (size_t k = 1; k <= 40; k += 9) {
5749 GemmMicrokernelTester()
5750 .mr(6)
5751 .nr(8)
5752 .kr(1)
5753 .sr(1)
5754 .m(6)
5755 .n(8)
5756 .k(k)
5757 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005758 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005759 }
5760 }
5761
Marat Dukhande06f492020-04-09 00:19:31 -07005762 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005763 TEST_REQUIRES_ARM_NEON_FMA;
5764 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005765 for (uint32_t n = 1; n <= 8; n++) {
5766 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005767 GemmMicrokernelTester()
5768 .mr(6)
5769 .nr(8)
5770 .kr(1)
5771 .sr(1)
5772 .m(m)
5773 .n(n)
5774 .k(k)
5775 .ks(3)
5776 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005777 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005778 }
5779 }
5780 }
5781 }
5782
Marat Dukhande06f492020-04-09 00:19:31 -07005783 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005784 TEST_REQUIRES_ARM_NEON_FMA;
5785 for (uint32_t n = 9; n < 16; n++) {
5786 for (size_t k = 1; k <= 40; k += 9) {
5787 GemmMicrokernelTester()
5788 .mr(6)
5789 .nr(8)
5790 .kr(1)
5791 .sr(1)
5792 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005793 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005794 .k(k)
5795 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005796 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005797 }
5798 }
5799 }
5800
Marat Dukhande06f492020-04-09 00:19:31 -07005801 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005802 TEST_REQUIRES_ARM_NEON_FMA;
5803 for (uint32_t n = 16; n <= 24; n += 8) {
5804 for (size_t k = 1; k <= 40; k += 9) {
5805 GemmMicrokernelTester()
5806 .mr(6)
5807 .nr(8)
5808 .kr(1)
5809 .sr(1)
5810 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005811 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07005812 .k(k)
5813 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005814 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005815 }
5816 }
5817 }
5818
Marat Dukhande06f492020-04-09 00:19:31 -07005819 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005820 TEST_REQUIRES_ARM_NEON_FMA;
5821 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005822 for (uint32_t n = 1; n <= 8; n++) {
5823 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005824 GemmMicrokernelTester()
5825 .mr(6)
5826 .nr(8)
5827 .kr(1)
5828 .sr(1)
5829 .m(m)
5830 .n(n)
5831 .k(k)
5832 .cm_stride(11)
5833 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005834 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005835 }
5836 }
5837 }
5838 }
5839
Marat Dukhande06f492020-04-09 00:19:31 -07005840 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005841 TEST_REQUIRES_ARM_NEON_FMA;
5842 for (size_t k = 1; k <= 40; k += 9) {
5843 GemmMicrokernelTester()
5844 .mr(6)
5845 .nr(8)
5846 .kr(1)
5847 .sr(1)
5848 .m(6)
5849 .n(8)
5850 .k(k)
5851 .ks(3)
5852 .a_offset(251)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005853 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005854 }
5855 }
5856
Marat Dukhande06f492020-04-09 00:19:31 -07005857 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005858 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005859 for (size_t k = 1; k <= 40; k += 9) {
5860 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005861 GemmMicrokernelTester()
5862 .mr(6)
5863 .nr(8)
5864 .kr(1)
5865 .sr(1)
5866 .m(6)
5867 .n(8)
5868 .k(k)
5869 .ks(3)
5870 .a_offset(251)
5871 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005872 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005873 }
5874 }
5875 }
5876
Marat Dukhande06f492020-04-09 00:19:31 -07005877 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005878 TEST_REQUIRES_ARM_NEON_FMA;
5879 GemmMicrokernelTester()
5880 .mr(6)
5881 .nr(8)
5882 .kr(1)
5883 .sr(1)
5884 .m(6)
5885 .n(8)
5886 .k(8)
5887 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005888 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005889 }
5890
Marat Dukhande06f492020-04-09 00:19:31 -07005891 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005892 TEST_REQUIRES_ARM_NEON_FMA;
5893 GemmMicrokernelTester()
5894 .mr(6)
5895 .nr(8)
5896 .kr(1)
5897 .sr(1)
5898 .m(6)
5899 .n(8)
5900 .k(8)
5901 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005902 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005903 }
5904
Marat Dukhande06f492020-04-09 00:19:31 -07005905 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A73, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07005906 TEST_REQUIRES_ARM_NEON_FMA;
5907 GemmMicrokernelTester()
5908 .mr(6)
5909 .nr(8)
5910 .kr(1)
5911 .sr(1)
5912 .m(6)
5913 .n(8)
5914 .k(8)
5915 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07005916 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07005917 }
5918#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5919
5920
5921#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard143a1102021-06-15 09:15:34 -07005922 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
5923 TEST_REQUIRES_ARM_NEON_FMA;
5924 GemmMicrokernelTester()
5925 .mr(6)
5926 .nr(8)
5927 .kr(1)
5928 .sr(1)
5929 .m(6)
5930 .n(8)
5931 .k(8)
5932 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5933 }
5934
5935 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
5936 TEST_REQUIRES_ARM_NEON_FMA;
5937 GemmMicrokernelTester()
5938 .mr(6)
5939 .nr(8)
5940 .kr(1)
5941 .sr(1)
5942 .m(6)
5943 .n(8)
5944 .k(8)
5945 .cn_stride(11)
5946 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5947 }
5948
5949 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
5950 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005951 for (uint32_t n = 1; n <= 8; n++) {
5952 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07005953 GemmMicrokernelTester()
5954 .mr(6)
5955 .nr(8)
5956 .kr(1)
5957 .sr(1)
5958 .m(m)
5959 .n(n)
5960 .k(8)
5961 .iterations(1)
5962 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5963 }
5964 }
5965 }
5966
5967 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
5968 TEST_REQUIRES_ARM_NEON_FMA;
5969 for (uint32_t m = 1; m <= 6; m++) {
5970 GemmMicrokernelTester()
5971 .mr(6)
5972 .nr(8)
5973 .kr(1)
5974 .sr(1)
5975 .m(m)
5976 .n(8)
5977 .k(8)
5978 .iterations(1)
5979 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5980 }
5981 }
5982
5983 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
5984 TEST_REQUIRES_ARM_NEON_FMA;
5985 for (uint32_t n = 1; n <= 8; n++) {
5986 GemmMicrokernelTester()
5987 .mr(6)
5988 .nr(8)
5989 .kr(1)
5990 .sr(1)
5991 .m(6)
5992 .n(n)
5993 .k(8)
5994 .iterations(1)
5995 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
5996 }
5997 }
5998
5999 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
6000 TEST_REQUIRES_ARM_NEON_FMA;
6001 GemmMicrokernelTester()
6002 .mr(6)
6003 .nr(8)
6004 .kr(1)
6005 .sr(1)
6006 .m(6)
6007 .n(8)
6008 .k(16)
6009 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6010 }
6011
6012 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
6013 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006014 for (uint32_t n = 1; n <= 8; n++) {
6015 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006016 GemmMicrokernelTester()
6017 .mr(6)
6018 .nr(8)
6019 .kr(1)
6020 .sr(1)
6021 .m(m)
6022 .n(n)
6023 .k(16)
6024 .iterations(1)
6025 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6026 }
6027 }
6028 }
6029
6030 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
6031 TEST_REQUIRES_ARM_NEON_FMA;
6032 for (size_t k = 1; k < 16; k++) {
6033 GemmMicrokernelTester()
6034 .mr(6)
6035 .nr(8)
6036 .kr(1)
6037 .sr(1)
6038 .m(6)
6039 .n(8)
6040 .k(k)
6041 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6042 }
6043 }
6044
6045 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
6046 TEST_REQUIRES_ARM_NEON_FMA;
6047 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006048 for (uint32_t n = 1; n <= 8; n++) {
6049 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006050 GemmMicrokernelTester()
6051 .mr(6)
6052 .nr(8)
6053 .kr(1)
6054 .sr(1)
6055 .m(m)
6056 .n(n)
6057 .k(k)
6058 .iterations(1)
6059 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6060 }
6061 }
6062 }
6063 }
6064
6065 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
6066 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08006067 for (size_t k = 17; k < 32; k++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006068 GemmMicrokernelTester()
6069 .mr(6)
6070 .nr(8)
6071 .kr(1)
6072 .sr(1)
6073 .m(6)
6074 .n(8)
6075 .k(k)
6076 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6077 }
6078 }
6079
Zhi An Ngc80ffb02021-12-22 13:06:25 -08006080 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
Frank Barchard143a1102021-06-15 09:15:34 -07006081 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ngc80ffb02021-12-22 13:06:25 -08006082 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006083 for (uint32_t n = 1; n <= 8; n++) {
6084 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006085 GemmMicrokernelTester()
6086 .mr(6)
6087 .nr(8)
6088 .kr(1)
6089 .sr(1)
6090 .m(m)
6091 .n(n)
6092 .k(k)
6093 .iterations(1)
6094 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6095 }
6096 }
6097 }
6098 }
6099
6100 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
6101 TEST_REQUIRES_ARM_NEON_FMA;
6102 for (size_t k = 24; k <= 80; k += 8) {
6103 GemmMicrokernelTester()
6104 .mr(6)
6105 .nr(8)
6106 .kr(1)
6107 .sr(1)
6108 .m(6)
6109 .n(8)
6110 .k(k)
6111 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6112 }
6113 }
6114
6115 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
6116 TEST_REQUIRES_ARM_NEON_FMA;
6117 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006118 for (uint32_t n = 1; n <= 8; n++) {
6119 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006120 GemmMicrokernelTester()
6121 .mr(6)
6122 .nr(8)
6123 .kr(1)
6124 .sr(1)
6125 .m(m)
6126 .n(n)
6127 .k(k)
6128 .iterations(1)
6129 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6130 }
6131 }
6132 }
6133 }
6134
6135 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
6136 TEST_REQUIRES_ARM_NEON_FMA;
6137 for (uint32_t n = 9; n < 16; n++) {
6138 for (size_t k = 1; k <= 40; k += 9) {
6139 GemmMicrokernelTester()
6140 .mr(6)
6141 .nr(8)
6142 .kr(1)
6143 .sr(1)
6144 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006145 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07006146 .k(k)
6147 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6148 }
6149 }
6150 }
6151
6152 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
6153 TEST_REQUIRES_ARM_NEON_FMA;
6154 for (uint32_t n = 9; n < 16; n++) {
6155 for (size_t k = 1; k <= 40; k += 9) {
6156 GemmMicrokernelTester()
6157 .mr(6)
6158 .nr(8)
6159 .kr(1)
6160 .sr(1)
6161 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006162 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07006163 .k(k)
6164 .cn_stride(11)
6165 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6166 }
6167 }
6168 }
6169
6170 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
6171 TEST_REQUIRES_ARM_NEON_FMA;
6172 for (uint32_t n = 9; n < 16; n++) {
6173 for (size_t k = 1; k <= 40; k += 9) {
6174 for (uint32_t m = 1; m <= 6; m++) {
6175 GemmMicrokernelTester()
6176 .mr(6)
6177 .nr(8)
6178 .kr(1)
6179 .sr(1)
6180 .m(m)
6181 .n(n)
6182 .k(k)
6183 .iterations(1)
6184 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6185 }
6186 }
6187 }
6188 }
6189
6190 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
6191 TEST_REQUIRES_ARM_NEON_FMA;
6192 for (uint32_t n = 16; n <= 24; n += 8) {
6193 for (size_t k = 1; k <= 40; k += 9) {
6194 GemmMicrokernelTester()
6195 .mr(6)
6196 .nr(8)
6197 .kr(1)
6198 .sr(1)
6199 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006200 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07006201 .k(k)
6202 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6203 }
6204 }
6205 }
6206
6207 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
6208 TEST_REQUIRES_ARM_NEON_FMA;
6209 for (uint32_t n = 16; n <= 24; n += 8) {
6210 for (size_t k = 1; k <= 40; k += 9) {
6211 GemmMicrokernelTester()
6212 .mr(6)
6213 .nr(8)
6214 .kr(1)
6215 .sr(1)
6216 .m(6)
6217 .n(n)
6218 .k(k)
6219 .cn_stride(11)
6220 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6221 }
6222 }
6223 }
6224
6225 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
6226 TEST_REQUIRES_ARM_NEON_FMA;
6227 for (uint32_t n = 16; n <= 24; n += 8) {
6228 for (size_t k = 1; k <= 40; k += 9) {
6229 for (uint32_t m = 1; m <= 6; m++) {
6230 GemmMicrokernelTester()
6231 .mr(6)
6232 .nr(8)
6233 .kr(1)
6234 .sr(1)
6235 .m(m)
6236 .n(n)
6237 .k(k)
6238 .iterations(1)
6239 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6240 }
6241 }
6242 }
6243 }
6244
6245 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
6246 TEST_REQUIRES_ARM_NEON_FMA;
6247 for (size_t k = 1; k <= 40; k += 9) {
6248 GemmMicrokernelTester()
6249 .mr(6)
6250 .nr(8)
6251 .kr(1)
6252 .sr(1)
6253 .m(6)
6254 .n(8)
6255 .k(k)
6256 .ks(3)
6257 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6258 }
6259 }
6260
6261 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
6262 TEST_REQUIRES_ARM_NEON_FMA;
6263 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006264 for (uint32_t n = 1; n <= 8; n++) {
6265 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006266 GemmMicrokernelTester()
6267 .mr(6)
6268 .nr(8)
6269 .kr(1)
6270 .sr(1)
6271 .m(m)
6272 .n(n)
6273 .k(k)
6274 .ks(3)
6275 .iterations(1)
6276 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6277 }
6278 }
6279 }
6280 }
6281
6282 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
6283 TEST_REQUIRES_ARM_NEON_FMA;
6284 for (uint32_t n = 9; n < 16; n++) {
6285 for (size_t k = 1; k <= 40; k += 9) {
6286 GemmMicrokernelTester()
6287 .mr(6)
6288 .nr(8)
6289 .kr(1)
6290 .sr(1)
6291 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006292 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07006293 .k(k)
6294 .ks(3)
6295 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6296 }
6297 }
6298 }
6299
6300 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
6301 TEST_REQUIRES_ARM_NEON_FMA;
6302 for (uint32_t n = 16; n <= 24; n += 8) {
6303 for (size_t k = 1; k <= 40; k += 9) {
6304 GemmMicrokernelTester()
6305 .mr(6)
6306 .nr(8)
6307 .kr(1)
6308 .sr(1)
6309 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006310 .n(n)
Frank Barchard143a1102021-06-15 09:15:34 -07006311 .k(k)
6312 .ks(3)
6313 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6314 }
6315 }
6316 }
6317
6318 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
6319 TEST_REQUIRES_ARM_NEON_FMA;
6320 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006321 for (uint32_t n = 1; n <= 8; n++) {
6322 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006323 GemmMicrokernelTester()
6324 .mr(6)
6325 .nr(8)
6326 .kr(1)
6327 .sr(1)
6328 .m(m)
6329 .n(n)
6330 .k(k)
6331 .cm_stride(11)
6332 .iterations(1)
6333 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6334 }
6335 }
6336 }
6337 }
6338
6339 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
6340 TEST_REQUIRES_ARM_NEON_FMA;
6341 for (size_t k = 1; k <= 40; k += 9) {
6342 GemmMicrokernelTester()
6343 .mr(6)
6344 .nr(8)
6345 .kr(1)
6346 .sr(1)
6347 .m(6)
6348 .n(8)
6349 .k(k)
6350 .ks(3)
6351 .a_offset(251)
6352 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6353 }
6354 }
6355
6356 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
6357 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006358 for (size_t k = 1; k <= 40; k += 9) {
6359 for (uint32_t mz = 0; mz < 6; mz++) {
Frank Barchard143a1102021-06-15 09:15:34 -07006360 GemmMicrokernelTester()
6361 .mr(6)
6362 .nr(8)
6363 .kr(1)
6364 .sr(1)
6365 .m(6)
6366 .n(8)
6367 .k(k)
6368 .ks(3)
6369 .a_offset(251)
6370 .zero_index(mz)
6371 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6372 }
6373 }
6374 }
6375
6376 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
6377 TEST_REQUIRES_ARM_NEON_FMA;
6378 GemmMicrokernelTester()
6379 .mr(6)
6380 .nr(8)
6381 .kr(1)
6382 .sr(1)
6383 .m(6)
6384 .n(8)
6385 .k(8)
6386 .qmin(128)
6387 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6388 }
6389
6390 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
6391 TEST_REQUIRES_ARM_NEON_FMA;
6392 GemmMicrokernelTester()
6393 .mr(6)
6394 .nr(8)
6395 .kr(1)
6396 .sr(1)
6397 .m(6)
6398 .n(8)
6399 .k(8)
6400 .qmax(128)
6401 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6402 }
6403
6404 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
6405 TEST_REQUIRES_ARM_NEON_FMA;
6406 GemmMicrokernelTester()
6407 .mr(6)
6408 .nr(8)
6409 .kr(1)
6410 .sr(1)
6411 .m(6)
6412 .n(8)
6413 .k(8)
6414 .cm_stride(11)
6415 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
6416 }
6417#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6418
6419
Marat Dukhan1c587112020-04-08 20:04:28 -07006420#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07006421 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006422 TEST_REQUIRES_ARM_NEON;
6423 GemmMicrokernelTester()
6424 .mr(4)
6425 .nr(2)
6426 .kr(1)
6427 .sr(1)
6428 .m(4)
6429 .n(2)
6430 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006431 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006432 }
6433
Marat Dukhande06f492020-04-09 00:19:31 -07006434 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006435 TEST_REQUIRES_ARM_NEON;
6436 GemmMicrokernelTester()
6437 .mr(4)
6438 .nr(2)
6439 .kr(1)
6440 .sr(1)
6441 .m(4)
6442 .n(2)
6443 .k(2)
6444 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006445 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006446 }
6447
Marat Dukhande06f492020-04-09 00:19:31 -07006448 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006449 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006450 for (uint32_t n = 1; n <= 2; n++) {
6451 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006452 GemmMicrokernelTester()
6453 .mr(4)
6454 .nr(2)
6455 .kr(1)
6456 .sr(1)
6457 .m(m)
6458 .n(n)
6459 .k(2)
6460 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006461 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006462 }
6463 }
6464 }
6465
Marat Dukhande06f492020-04-09 00:19:31 -07006466 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006467 TEST_REQUIRES_ARM_NEON;
6468 for (uint32_t m = 1; m <= 4; m++) {
6469 GemmMicrokernelTester()
6470 .mr(4)
6471 .nr(2)
6472 .kr(1)
6473 .sr(1)
6474 .m(m)
6475 .n(2)
6476 .k(2)
6477 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006478 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006479 }
6480 }
6481
Marat Dukhande06f492020-04-09 00:19:31 -07006482 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006483 TEST_REQUIRES_ARM_NEON;
6484 for (uint32_t n = 1; n <= 2; n++) {
6485 GemmMicrokernelTester()
6486 .mr(4)
6487 .nr(2)
6488 .kr(1)
6489 .sr(1)
6490 .m(4)
6491 .n(n)
6492 .k(2)
6493 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006494 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006495 }
6496 }
6497
Marat Dukhande06f492020-04-09 00:19:31 -07006498 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006499 TEST_REQUIRES_ARM_NEON;
6500 for (size_t k = 1; k < 2; k++) {
6501 GemmMicrokernelTester()
6502 .mr(4)
6503 .nr(2)
6504 .kr(1)
6505 .sr(1)
6506 .m(4)
6507 .n(2)
6508 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006509 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006510 }
6511 }
6512
Marat Dukhande06f492020-04-09 00:19:31 -07006513 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006514 TEST_REQUIRES_ARM_NEON;
6515 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006516 for (uint32_t n = 1; n <= 2; n++) {
6517 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006518 GemmMicrokernelTester()
6519 .mr(4)
6520 .nr(2)
6521 .kr(1)
6522 .sr(1)
6523 .m(m)
6524 .n(n)
6525 .k(k)
6526 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006527 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006528 }
6529 }
6530 }
6531 }
6532
Marat Dukhande06f492020-04-09 00:19:31 -07006533 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006534 TEST_REQUIRES_ARM_NEON;
6535 for (size_t k = 3; k < 4; k++) {
6536 GemmMicrokernelTester()
6537 .mr(4)
6538 .nr(2)
6539 .kr(1)
6540 .sr(1)
6541 .m(4)
6542 .n(2)
6543 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006544 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006545 }
6546 }
6547
Marat Dukhande06f492020-04-09 00:19:31 -07006548 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006549 TEST_REQUIRES_ARM_NEON;
6550 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006551 for (uint32_t n = 1; n <= 2; n++) {
6552 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006553 GemmMicrokernelTester()
6554 .mr(4)
6555 .nr(2)
6556 .kr(1)
6557 .sr(1)
6558 .m(m)
6559 .n(n)
6560 .k(k)
6561 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006562 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006563 }
6564 }
6565 }
6566 }
6567
Marat Dukhande06f492020-04-09 00:19:31 -07006568 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006569 TEST_REQUIRES_ARM_NEON;
6570 for (size_t k = 4; k <= 20; k += 2) {
6571 GemmMicrokernelTester()
6572 .mr(4)
6573 .nr(2)
6574 .kr(1)
6575 .sr(1)
6576 .m(4)
6577 .n(2)
6578 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006579 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006580 }
6581 }
6582
Marat Dukhande06f492020-04-09 00:19:31 -07006583 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006584 TEST_REQUIRES_ARM_NEON;
6585 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006586 for (uint32_t n = 1; n <= 2; n++) {
6587 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006588 GemmMicrokernelTester()
6589 .mr(4)
6590 .nr(2)
6591 .kr(1)
6592 .sr(1)
6593 .m(m)
6594 .n(n)
6595 .k(k)
6596 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006597 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006598 }
6599 }
6600 }
6601 }
6602
Marat Dukhande06f492020-04-09 00:19:31 -07006603 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006604 TEST_REQUIRES_ARM_NEON;
6605 for (uint32_t n = 3; n < 4; n++) {
6606 for (size_t k = 1; k <= 10; k += 3) {
6607 GemmMicrokernelTester()
6608 .mr(4)
6609 .nr(2)
6610 .kr(1)
6611 .sr(1)
6612 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006613 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006614 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006615 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006616 }
6617 }
6618 }
6619
Marat Dukhande06f492020-04-09 00:19:31 -07006620 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006621 TEST_REQUIRES_ARM_NEON;
6622 for (uint32_t n = 3; n < 4; n++) {
6623 for (size_t k = 1; k <= 10; k += 3) {
6624 GemmMicrokernelTester()
6625 .mr(4)
6626 .nr(2)
6627 .kr(1)
6628 .sr(1)
6629 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006630 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006631 .k(k)
6632 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006633 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006634 }
6635 }
6636 }
6637
Marat Dukhande06f492020-04-09 00:19:31 -07006638 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006639 TEST_REQUIRES_ARM_NEON;
6640 for (uint32_t n = 3; n < 4; n++) {
6641 for (size_t k = 1; k <= 10; k += 3) {
6642 for (uint32_t m = 1; m <= 4; m++) {
6643 GemmMicrokernelTester()
6644 .mr(4)
6645 .nr(2)
6646 .kr(1)
6647 .sr(1)
6648 .m(m)
6649 .n(n)
6650 .k(k)
6651 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006652 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006653 }
6654 }
6655 }
6656 }
6657
Marat Dukhande06f492020-04-09 00:19:31 -07006658 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006659 TEST_REQUIRES_ARM_NEON;
6660 for (uint32_t n = 4; n <= 6; n += 2) {
6661 for (size_t k = 1; k <= 10; k += 3) {
6662 GemmMicrokernelTester()
6663 .mr(4)
6664 .nr(2)
6665 .kr(1)
6666 .sr(1)
6667 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006668 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006669 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006670 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006671 }
6672 }
6673 }
6674
Marat Dukhande06f492020-04-09 00:19:31 -07006675 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006676 TEST_REQUIRES_ARM_NEON;
6677 for (uint32_t n = 4; n <= 6; n += 2) {
6678 for (size_t k = 1; k <= 10; k += 3) {
6679 GemmMicrokernelTester()
6680 .mr(4)
6681 .nr(2)
6682 .kr(1)
6683 .sr(1)
6684 .m(4)
6685 .n(n)
6686 .k(k)
6687 .cn_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006688 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006689 }
6690 }
6691 }
6692
Marat Dukhande06f492020-04-09 00:19:31 -07006693 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006694 TEST_REQUIRES_ARM_NEON;
6695 for (uint32_t n = 4; n <= 6; n += 2) {
6696 for (size_t k = 1; k <= 10; k += 3) {
6697 for (uint32_t m = 1; m <= 4; m++) {
6698 GemmMicrokernelTester()
6699 .mr(4)
6700 .nr(2)
6701 .kr(1)
6702 .sr(1)
6703 .m(m)
6704 .n(n)
6705 .k(k)
6706 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006707 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006708 }
6709 }
6710 }
6711 }
6712
Marat Dukhande06f492020-04-09 00:19:31 -07006713 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006714 TEST_REQUIRES_ARM_NEON;
6715 for (size_t k = 1; k <= 10; k += 3) {
6716 GemmMicrokernelTester()
6717 .mr(4)
6718 .nr(2)
6719 .kr(1)
6720 .sr(1)
6721 .m(4)
6722 .n(2)
6723 .k(k)
6724 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006725 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006726 }
6727 }
6728
Marat Dukhande06f492020-04-09 00:19:31 -07006729 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006730 TEST_REQUIRES_ARM_NEON;
6731 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006732 for (uint32_t n = 1; n <= 2; n++) {
6733 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006734 GemmMicrokernelTester()
6735 .mr(4)
6736 .nr(2)
6737 .kr(1)
6738 .sr(1)
6739 .m(m)
6740 .n(n)
6741 .k(k)
6742 .ks(3)
6743 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006744 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006745 }
6746 }
6747 }
6748 }
6749
Marat Dukhande06f492020-04-09 00:19:31 -07006750 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_gt_2_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006751 TEST_REQUIRES_ARM_NEON;
6752 for (uint32_t n = 3; n < 4; n++) {
6753 for (size_t k = 1; k <= 10; k += 3) {
6754 GemmMicrokernelTester()
6755 .mr(4)
6756 .nr(2)
6757 .kr(1)
6758 .sr(1)
6759 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006760 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006761 .k(k)
6762 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006763 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006764 }
6765 }
6766 }
6767
Marat Dukhande06f492020-04-09 00:19:31 -07006768 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, n_div_2_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006769 TEST_REQUIRES_ARM_NEON;
6770 for (uint32_t n = 4; n <= 6; n += 2) {
6771 for (size_t k = 1; k <= 10; k += 3) {
6772 GemmMicrokernelTester()
6773 .mr(4)
6774 .nr(2)
6775 .kr(1)
6776 .sr(1)
6777 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006778 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07006779 .k(k)
6780 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006781 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006782 }
6783 }
6784 }
6785
Marat Dukhande06f492020-04-09 00:19:31 -07006786 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006787 TEST_REQUIRES_ARM_NEON;
6788 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006789 for (uint32_t n = 1; n <= 2; n++) {
6790 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006791 GemmMicrokernelTester()
6792 .mr(4)
6793 .nr(2)
6794 .kr(1)
6795 .sr(1)
6796 .m(m)
6797 .n(n)
6798 .k(k)
6799 .cm_stride(5)
6800 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006801 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006802 }
6803 }
6804 }
6805 }
6806
Marat Dukhande06f492020-04-09 00:19:31 -07006807 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006808 TEST_REQUIRES_ARM_NEON;
6809 for (size_t k = 1; k <= 10; k += 3) {
6810 GemmMicrokernelTester()
6811 .mr(4)
6812 .nr(2)
6813 .kr(1)
6814 .sr(1)
6815 .m(4)
6816 .n(2)
6817 .k(k)
6818 .ks(3)
6819 .a_offset(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006820 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006821 }
6822 }
6823
Marat Dukhande06f492020-04-09 00:19:31 -07006824 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006825 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006826 for (size_t k = 1; k <= 10; k += 3) {
6827 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006828 GemmMicrokernelTester()
6829 .mr(4)
6830 .nr(2)
6831 .kr(1)
6832 .sr(1)
6833 .m(4)
6834 .n(2)
6835 .k(k)
6836 .ks(3)
6837 .a_offset(43)
6838 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006839 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006840 }
6841 }
6842 }
6843
Marat Dukhande06f492020-04-09 00:19:31 -07006844 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006845 TEST_REQUIRES_ARM_NEON;
6846 GemmMicrokernelTester()
6847 .mr(4)
6848 .nr(2)
6849 .kr(1)
6850 .sr(1)
6851 .m(4)
6852 .n(2)
6853 .k(2)
6854 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006855 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006856 }
6857
Marat Dukhande06f492020-04-09 00:19:31 -07006858 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006859 TEST_REQUIRES_ARM_NEON;
6860 GemmMicrokernelTester()
6861 .mr(4)
6862 .nr(2)
6863 .kr(1)
6864 .sr(1)
6865 .m(4)
6866 .n(2)
6867 .k(2)
6868 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006869 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006870 }
6871
Marat Dukhande06f492020-04-09 00:19:31 -07006872 TEST(F32_IGEMM_MINMAX_4X2__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006873 TEST_REQUIRES_ARM_NEON;
6874 GemmMicrokernelTester()
6875 .mr(4)
6876 .nr(2)
6877 .kr(1)
6878 .sr(1)
6879 .m(4)
6880 .n(2)
6881 .k(2)
6882 .cm_stride(5)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006883 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006884 }
6885#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6886
6887
6888#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07006889 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006890 TEST_REQUIRES_ARM_NEON;
6891 GemmMicrokernelTester()
6892 .mr(4)
6893 .nr(4)
6894 .kr(1)
6895 .sr(1)
6896 .m(4)
6897 .n(4)
6898 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006899 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006900 }
6901
Marat Dukhande06f492020-04-09 00:19:31 -07006902 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006903 TEST_REQUIRES_ARM_NEON;
6904 GemmMicrokernelTester()
6905 .mr(4)
6906 .nr(4)
6907 .kr(1)
6908 .sr(1)
6909 .m(4)
6910 .n(4)
6911 .k(2)
6912 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006913 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006914 }
6915
Marat Dukhande06f492020-04-09 00:19:31 -07006916 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006917 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006918 for (uint32_t n = 1; n <= 4; n++) {
6919 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006920 GemmMicrokernelTester()
6921 .mr(4)
6922 .nr(4)
6923 .kr(1)
6924 .sr(1)
6925 .m(m)
6926 .n(n)
6927 .k(2)
6928 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006929 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006930 }
6931 }
6932 }
6933
Marat Dukhande06f492020-04-09 00:19:31 -07006934 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006935 TEST_REQUIRES_ARM_NEON;
6936 for (uint32_t m = 1; m <= 4; m++) {
6937 GemmMicrokernelTester()
6938 .mr(4)
6939 .nr(4)
6940 .kr(1)
6941 .sr(1)
6942 .m(m)
6943 .n(4)
6944 .k(2)
6945 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006946 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006947 }
6948 }
6949
Marat Dukhande06f492020-04-09 00:19:31 -07006950 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006951 TEST_REQUIRES_ARM_NEON;
6952 for (uint32_t n = 1; n <= 4; n++) {
6953 GemmMicrokernelTester()
6954 .mr(4)
6955 .nr(4)
6956 .kr(1)
6957 .sr(1)
6958 .m(4)
6959 .n(n)
6960 .k(2)
6961 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006962 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006963 }
6964 }
6965
Marat Dukhande06f492020-04-09 00:19:31 -07006966 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006967 TEST_REQUIRES_ARM_NEON;
6968 for (size_t k = 1; k < 2; k++) {
6969 GemmMicrokernelTester()
6970 .mr(4)
6971 .nr(4)
6972 .kr(1)
6973 .sr(1)
6974 .m(4)
6975 .n(4)
6976 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006977 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006978 }
6979 }
6980
Marat Dukhande06f492020-04-09 00:19:31 -07006981 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006982 TEST_REQUIRES_ARM_NEON;
6983 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006984 for (uint32_t n = 1; n <= 4; n++) {
6985 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07006986 GemmMicrokernelTester()
6987 .mr(4)
6988 .nr(4)
6989 .kr(1)
6990 .sr(1)
6991 .m(m)
6992 .n(n)
6993 .k(k)
6994 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07006995 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07006996 }
6997 }
6998 }
6999 }
7000
Marat Dukhande06f492020-04-09 00:19:31 -07007001 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007002 TEST_REQUIRES_ARM_NEON;
7003 for (size_t k = 3; k < 4; k++) {
7004 GemmMicrokernelTester()
7005 .mr(4)
7006 .nr(4)
7007 .kr(1)
7008 .sr(1)
7009 .m(4)
7010 .n(4)
7011 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007012 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007013 }
7014 }
7015
Marat Dukhande06f492020-04-09 00:19:31 -07007016 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007017 TEST_REQUIRES_ARM_NEON;
7018 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007019 for (uint32_t n = 1; n <= 4; n++) {
7020 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007021 GemmMicrokernelTester()
7022 .mr(4)
7023 .nr(4)
7024 .kr(1)
7025 .sr(1)
7026 .m(m)
7027 .n(n)
7028 .k(k)
7029 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007030 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007031 }
7032 }
7033 }
7034 }
7035
Marat Dukhande06f492020-04-09 00:19:31 -07007036 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007037 TEST_REQUIRES_ARM_NEON;
7038 for (size_t k = 4; k <= 20; k += 2) {
7039 GemmMicrokernelTester()
7040 .mr(4)
7041 .nr(4)
7042 .kr(1)
7043 .sr(1)
7044 .m(4)
7045 .n(4)
7046 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007047 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007048 }
7049 }
7050
Marat Dukhande06f492020-04-09 00:19:31 -07007051 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007052 TEST_REQUIRES_ARM_NEON;
7053 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007054 for (uint32_t n = 1; n <= 4; n++) {
7055 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007056 GemmMicrokernelTester()
7057 .mr(4)
7058 .nr(4)
7059 .kr(1)
7060 .sr(1)
7061 .m(m)
7062 .n(n)
7063 .k(k)
7064 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007065 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007066 }
7067 }
7068 }
7069 }
7070
Marat Dukhande06f492020-04-09 00:19:31 -07007071 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007072 TEST_REQUIRES_ARM_NEON;
7073 for (uint32_t n = 5; n < 8; n++) {
7074 for (size_t k = 1; k <= 10; k += 3) {
7075 GemmMicrokernelTester()
7076 .mr(4)
7077 .nr(4)
7078 .kr(1)
7079 .sr(1)
7080 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007081 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007082 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007083 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007084 }
7085 }
7086 }
7087
Marat Dukhande06f492020-04-09 00:19:31 -07007088 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007089 TEST_REQUIRES_ARM_NEON;
7090 for (uint32_t n = 5; n < 8; n++) {
7091 for (size_t k = 1; k <= 10; k += 3) {
7092 GemmMicrokernelTester()
7093 .mr(4)
7094 .nr(4)
7095 .kr(1)
7096 .sr(1)
7097 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007098 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007099 .k(k)
7100 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007101 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007102 }
7103 }
7104 }
7105
Marat Dukhande06f492020-04-09 00:19:31 -07007106 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007107 TEST_REQUIRES_ARM_NEON;
7108 for (uint32_t n = 5; n < 8; n++) {
7109 for (size_t k = 1; k <= 10; k += 3) {
7110 for (uint32_t m = 1; m <= 4; m++) {
7111 GemmMicrokernelTester()
7112 .mr(4)
7113 .nr(4)
7114 .kr(1)
7115 .sr(1)
7116 .m(m)
7117 .n(n)
7118 .k(k)
7119 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007120 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007121 }
7122 }
7123 }
7124 }
7125
Marat Dukhande06f492020-04-09 00:19:31 -07007126 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007127 TEST_REQUIRES_ARM_NEON;
7128 for (uint32_t n = 8; n <= 12; n += 4) {
7129 for (size_t k = 1; k <= 10; k += 3) {
7130 GemmMicrokernelTester()
7131 .mr(4)
7132 .nr(4)
7133 .kr(1)
7134 .sr(1)
7135 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007136 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007137 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007138 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007139 }
7140 }
7141 }
7142
Marat Dukhande06f492020-04-09 00:19:31 -07007143 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007144 TEST_REQUIRES_ARM_NEON;
7145 for (uint32_t n = 8; n <= 12; n += 4) {
7146 for (size_t k = 1; k <= 10; k += 3) {
7147 GemmMicrokernelTester()
7148 .mr(4)
7149 .nr(4)
7150 .kr(1)
7151 .sr(1)
7152 .m(4)
7153 .n(n)
7154 .k(k)
7155 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007156 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007157 }
7158 }
7159 }
7160
Marat Dukhande06f492020-04-09 00:19:31 -07007161 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007162 TEST_REQUIRES_ARM_NEON;
7163 for (uint32_t n = 8; n <= 12; n += 4) {
7164 for (size_t k = 1; k <= 10; k += 3) {
7165 for (uint32_t m = 1; m <= 4; m++) {
7166 GemmMicrokernelTester()
7167 .mr(4)
7168 .nr(4)
7169 .kr(1)
7170 .sr(1)
7171 .m(m)
7172 .n(n)
7173 .k(k)
7174 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007175 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007176 }
7177 }
7178 }
7179 }
7180
Marat Dukhande06f492020-04-09 00:19:31 -07007181 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007182 TEST_REQUIRES_ARM_NEON;
7183 for (size_t k = 1; k <= 10; k += 3) {
7184 GemmMicrokernelTester()
7185 .mr(4)
7186 .nr(4)
7187 .kr(1)
7188 .sr(1)
7189 .m(4)
7190 .n(4)
7191 .k(k)
7192 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007193 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007194 }
7195 }
7196
Marat Dukhande06f492020-04-09 00:19:31 -07007197 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007198 TEST_REQUIRES_ARM_NEON;
7199 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007200 for (uint32_t n = 1; n <= 4; n++) {
7201 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007202 GemmMicrokernelTester()
7203 .mr(4)
7204 .nr(4)
7205 .kr(1)
7206 .sr(1)
7207 .m(m)
7208 .n(n)
7209 .k(k)
7210 .ks(3)
7211 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007212 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007213 }
7214 }
7215 }
7216 }
7217
Marat Dukhande06f492020-04-09 00:19:31 -07007218 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_gt_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007219 TEST_REQUIRES_ARM_NEON;
7220 for (uint32_t n = 5; n < 8; n++) {
7221 for (size_t k = 1; k <= 10; k += 3) {
7222 GemmMicrokernelTester()
7223 .mr(4)
7224 .nr(4)
7225 .kr(1)
7226 .sr(1)
7227 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007228 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007229 .k(k)
7230 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007231 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007232 }
7233 }
7234 }
7235
Marat Dukhande06f492020-04-09 00:19:31 -07007236 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, n_div_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007237 TEST_REQUIRES_ARM_NEON;
7238 for (uint32_t n = 8; n <= 12; n += 4) {
7239 for (size_t k = 1; k <= 10; k += 3) {
7240 GemmMicrokernelTester()
7241 .mr(4)
7242 .nr(4)
7243 .kr(1)
7244 .sr(1)
7245 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007246 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007247 .k(k)
7248 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007249 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007250 }
7251 }
7252 }
7253
Marat Dukhande06f492020-04-09 00:19:31 -07007254 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007255 TEST_REQUIRES_ARM_NEON;
7256 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007257 for (uint32_t n = 1; n <= 4; n++) {
7258 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007259 GemmMicrokernelTester()
7260 .mr(4)
7261 .nr(4)
7262 .kr(1)
7263 .sr(1)
7264 .m(m)
7265 .n(n)
7266 .k(k)
7267 .cm_stride(7)
7268 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007269 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007270 }
7271 }
7272 }
7273 }
7274
Marat Dukhande06f492020-04-09 00:19:31 -07007275 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007276 TEST_REQUIRES_ARM_NEON;
7277 for (size_t k = 1; k <= 10; k += 3) {
7278 GemmMicrokernelTester()
7279 .mr(4)
7280 .nr(4)
7281 .kr(1)
7282 .sr(1)
7283 .m(4)
7284 .n(4)
7285 .k(k)
7286 .ks(3)
7287 .a_offset(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007288 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007289 }
7290 }
7291
Marat Dukhande06f492020-04-09 00:19:31 -07007292 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007293 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007294 for (size_t k = 1; k <= 10; k += 3) {
7295 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007296 GemmMicrokernelTester()
7297 .mr(4)
7298 .nr(4)
7299 .kr(1)
7300 .sr(1)
7301 .m(4)
7302 .n(4)
7303 .k(k)
7304 .ks(3)
7305 .a_offset(43)
7306 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007307 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007308 }
7309 }
7310 }
7311
Marat Dukhande06f492020-04-09 00:19:31 -07007312 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007313 TEST_REQUIRES_ARM_NEON;
7314 GemmMicrokernelTester()
7315 .mr(4)
7316 .nr(4)
7317 .kr(1)
7318 .sr(1)
7319 .m(4)
7320 .n(4)
7321 .k(2)
7322 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007323 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007324 }
7325
Marat Dukhande06f492020-04-09 00:19:31 -07007326 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007327 TEST_REQUIRES_ARM_NEON;
7328 GemmMicrokernelTester()
7329 .mr(4)
7330 .nr(4)
7331 .kr(1)
7332 .sr(1)
7333 .m(4)
7334 .n(4)
7335 .k(2)
7336 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007337 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007338 }
7339
Marat Dukhande06f492020-04-09 00:19:31 -07007340 TEST(F32_IGEMM_MINMAX_4X4__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007341 TEST_REQUIRES_ARM_NEON;
7342 GemmMicrokernelTester()
7343 .mr(4)
7344 .nr(4)
7345 .kr(1)
7346 .sr(1)
7347 .m(4)
7348 .n(4)
7349 .k(2)
7350 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007351 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007352 }
7353#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7354
7355
7356#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07007357 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007358 TEST_REQUIRES_ARM_NEON;
7359 GemmMicrokernelTester()
7360 .mr(6)
7361 .nr(8)
7362 .kr(1)
7363 .sr(1)
7364 .m(6)
7365 .n(8)
7366 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007367 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007368 }
7369
Marat Dukhande06f492020-04-09 00:19:31 -07007370 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007371 TEST_REQUIRES_ARM_NEON;
7372 GemmMicrokernelTester()
7373 .mr(6)
7374 .nr(8)
7375 .kr(1)
7376 .sr(1)
7377 .m(6)
7378 .n(8)
7379 .k(2)
7380 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007381 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007382 }
7383
Marat Dukhande06f492020-04-09 00:19:31 -07007384 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007385 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007386 for (uint32_t n = 1; n <= 8; n++) {
7387 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007388 GemmMicrokernelTester()
7389 .mr(6)
7390 .nr(8)
7391 .kr(1)
7392 .sr(1)
7393 .m(m)
7394 .n(n)
7395 .k(2)
7396 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007397 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007398 }
7399 }
7400 }
7401
Marat Dukhande06f492020-04-09 00:19:31 -07007402 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007403 TEST_REQUIRES_ARM_NEON;
7404 for (uint32_t m = 1; m <= 6; m++) {
7405 GemmMicrokernelTester()
7406 .mr(6)
7407 .nr(8)
7408 .kr(1)
7409 .sr(1)
7410 .m(m)
7411 .n(8)
7412 .k(2)
7413 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007414 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007415 }
7416 }
7417
Marat Dukhande06f492020-04-09 00:19:31 -07007418 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007419 TEST_REQUIRES_ARM_NEON;
7420 for (uint32_t n = 1; n <= 8; n++) {
7421 GemmMicrokernelTester()
7422 .mr(6)
7423 .nr(8)
7424 .kr(1)
7425 .sr(1)
7426 .m(6)
7427 .n(n)
7428 .k(2)
7429 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007430 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007431 }
7432 }
7433
Marat Dukhande06f492020-04-09 00:19:31 -07007434 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007435 TEST_REQUIRES_ARM_NEON;
7436 for (size_t k = 1; k < 2; k++) {
7437 GemmMicrokernelTester()
7438 .mr(6)
7439 .nr(8)
7440 .kr(1)
7441 .sr(1)
7442 .m(6)
7443 .n(8)
7444 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007445 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007446 }
7447 }
7448
Marat Dukhande06f492020-04-09 00:19:31 -07007449 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007450 TEST_REQUIRES_ARM_NEON;
7451 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007452 for (uint32_t n = 1; n <= 8; n++) {
7453 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007454 GemmMicrokernelTester()
7455 .mr(6)
7456 .nr(8)
7457 .kr(1)
7458 .sr(1)
7459 .m(m)
7460 .n(n)
7461 .k(k)
7462 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007463 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007464 }
7465 }
7466 }
7467 }
7468
Marat Dukhande06f492020-04-09 00:19:31 -07007469 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007470 TEST_REQUIRES_ARM_NEON;
7471 for (size_t k = 3; k < 4; k++) {
7472 GemmMicrokernelTester()
7473 .mr(6)
7474 .nr(8)
7475 .kr(1)
7476 .sr(1)
7477 .m(6)
7478 .n(8)
7479 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007480 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007481 }
7482 }
7483
Marat Dukhande06f492020-04-09 00:19:31 -07007484 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007485 TEST_REQUIRES_ARM_NEON;
7486 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007487 for (uint32_t n = 1; n <= 8; n++) {
7488 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007489 GemmMicrokernelTester()
7490 .mr(6)
7491 .nr(8)
7492 .kr(1)
7493 .sr(1)
7494 .m(m)
7495 .n(n)
7496 .k(k)
7497 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007498 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007499 }
7500 }
7501 }
7502 }
7503
Marat Dukhande06f492020-04-09 00:19:31 -07007504 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007505 TEST_REQUIRES_ARM_NEON;
7506 for (size_t k = 4; k <= 20; k += 2) {
7507 GemmMicrokernelTester()
7508 .mr(6)
7509 .nr(8)
7510 .kr(1)
7511 .sr(1)
7512 .m(6)
7513 .n(8)
7514 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007515 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007516 }
7517 }
7518
Marat Dukhande06f492020-04-09 00:19:31 -07007519 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007520 TEST_REQUIRES_ARM_NEON;
7521 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007522 for (uint32_t n = 1; n <= 8; n++) {
7523 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007524 GemmMicrokernelTester()
7525 .mr(6)
7526 .nr(8)
7527 .kr(1)
7528 .sr(1)
7529 .m(m)
7530 .n(n)
7531 .k(k)
7532 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007533 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007534 }
7535 }
7536 }
7537 }
7538
Marat Dukhande06f492020-04-09 00:19:31 -07007539 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007540 TEST_REQUIRES_ARM_NEON;
7541 for (uint32_t n = 9; n < 16; n++) {
7542 for (size_t k = 1; k <= 10; k += 3) {
7543 GemmMicrokernelTester()
7544 .mr(6)
7545 .nr(8)
7546 .kr(1)
7547 .sr(1)
7548 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007549 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007550 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007551 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007552 }
7553 }
7554 }
7555
Marat Dukhande06f492020-04-09 00:19:31 -07007556 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007557 TEST_REQUIRES_ARM_NEON;
7558 for (uint32_t n = 9; n < 16; n++) {
7559 for (size_t k = 1; k <= 10; k += 3) {
7560 GemmMicrokernelTester()
7561 .mr(6)
7562 .nr(8)
7563 .kr(1)
7564 .sr(1)
7565 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007566 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007567 .k(k)
7568 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007569 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007570 }
7571 }
7572 }
7573
Marat Dukhande06f492020-04-09 00:19:31 -07007574 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007575 TEST_REQUIRES_ARM_NEON;
7576 for (uint32_t n = 9; n < 16; n++) {
7577 for (size_t k = 1; k <= 10; k += 3) {
7578 for (uint32_t m = 1; m <= 6; m++) {
7579 GemmMicrokernelTester()
7580 .mr(6)
7581 .nr(8)
7582 .kr(1)
7583 .sr(1)
7584 .m(m)
7585 .n(n)
7586 .k(k)
7587 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007588 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007589 }
7590 }
7591 }
7592 }
7593
Marat Dukhande06f492020-04-09 00:19:31 -07007594 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007595 TEST_REQUIRES_ARM_NEON;
7596 for (uint32_t n = 16; n <= 24; n += 8) {
7597 for (size_t k = 1; k <= 10; k += 3) {
7598 GemmMicrokernelTester()
7599 .mr(6)
7600 .nr(8)
7601 .kr(1)
7602 .sr(1)
7603 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007604 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007605 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007606 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007607 }
7608 }
7609 }
7610
Marat Dukhande06f492020-04-09 00:19:31 -07007611 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007612 TEST_REQUIRES_ARM_NEON;
7613 for (uint32_t n = 16; n <= 24; n += 8) {
7614 for (size_t k = 1; k <= 10; k += 3) {
7615 GemmMicrokernelTester()
7616 .mr(6)
7617 .nr(8)
7618 .kr(1)
7619 .sr(1)
7620 .m(6)
7621 .n(n)
7622 .k(k)
7623 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007624 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007625 }
7626 }
7627 }
7628
Marat Dukhande06f492020-04-09 00:19:31 -07007629 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007630 TEST_REQUIRES_ARM_NEON;
7631 for (uint32_t n = 16; n <= 24; n += 8) {
7632 for (size_t k = 1; k <= 10; k += 3) {
7633 for (uint32_t m = 1; m <= 6; m++) {
7634 GemmMicrokernelTester()
7635 .mr(6)
7636 .nr(8)
7637 .kr(1)
7638 .sr(1)
7639 .m(m)
7640 .n(n)
7641 .k(k)
7642 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007643 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007644 }
7645 }
7646 }
7647 }
7648
Marat Dukhande06f492020-04-09 00:19:31 -07007649 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007650 TEST_REQUIRES_ARM_NEON;
7651 for (size_t k = 1; k <= 10; k += 3) {
7652 GemmMicrokernelTester()
7653 .mr(6)
7654 .nr(8)
7655 .kr(1)
7656 .sr(1)
7657 .m(6)
7658 .n(8)
7659 .k(k)
7660 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007661 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007662 }
7663 }
7664
Marat Dukhande06f492020-04-09 00:19:31 -07007665 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007666 TEST_REQUIRES_ARM_NEON;
7667 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007668 for (uint32_t n = 1; n <= 8; n++) {
7669 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007670 GemmMicrokernelTester()
7671 .mr(6)
7672 .nr(8)
7673 .kr(1)
7674 .sr(1)
7675 .m(m)
7676 .n(n)
7677 .k(k)
7678 .ks(3)
7679 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007680 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007681 }
7682 }
7683 }
7684 }
7685
Marat Dukhande06f492020-04-09 00:19:31 -07007686 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007687 TEST_REQUIRES_ARM_NEON;
7688 for (uint32_t n = 9; n < 16; n++) {
7689 for (size_t k = 1; k <= 10; k += 3) {
7690 GemmMicrokernelTester()
7691 .mr(6)
7692 .nr(8)
7693 .kr(1)
7694 .sr(1)
7695 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007696 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007697 .k(k)
7698 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007699 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007700 }
7701 }
7702 }
7703
Marat Dukhande06f492020-04-09 00:19:31 -07007704 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007705 TEST_REQUIRES_ARM_NEON;
7706 for (uint32_t n = 16; n <= 24; n += 8) {
7707 for (size_t k = 1; k <= 10; k += 3) {
7708 GemmMicrokernelTester()
7709 .mr(6)
7710 .nr(8)
7711 .kr(1)
7712 .sr(1)
7713 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007714 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07007715 .k(k)
7716 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007717 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007718 }
7719 }
7720 }
7721
Marat Dukhande06f492020-04-09 00:19:31 -07007722 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007723 TEST_REQUIRES_ARM_NEON;
7724 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007725 for (uint32_t n = 1; n <= 8; n++) {
7726 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007727 GemmMicrokernelTester()
7728 .mr(6)
7729 .nr(8)
7730 .kr(1)
7731 .sr(1)
7732 .m(m)
7733 .n(n)
7734 .k(k)
7735 .cm_stride(11)
7736 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007737 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007738 }
7739 }
7740 }
7741 }
7742
Marat Dukhande06f492020-04-09 00:19:31 -07007743 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007744 TEST_REQUIRES_ARM_NEON;
7745 for (size_t k = 1; k <= 10; k += 3) {
7746 GemmMicrokernelTester()
7747 .mr(6)
7748 .nr(8)
7749 .kr(1)
7750 .sr(1)
7751 .m(6)
7752 .n(8)
7753 .k(k)
7754 .ks(3)
7755 .a_offset(67)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007756 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007757 }
7758 }
7759
Marat Dukhande06f492020-04-09 00:19:31 -07007760 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007761 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007762 for (size_t k = 1; k <= 10; k += 3) {
7763 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007764 GemmMicrokernelTester()
7765 .mr(6)
7766 .nr(8)
7767 .kr(1)
7768 .sr(1)
7769 .m(6)
7770 .n(8)
7771 .k(k)
7772 .ks(3)
7773 .a_offset(67)
7774 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007775 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007776 }
7777 }
7778 }
7779
Marat Dukhande06f492020-04-09 00:19:31 -07007780 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007781 TEST_REQUIRES_ARM_NEON;
7782 GemmMicrokernelTester()
7783 .mr(6)
7784 .nr(8)
7785 .kr(1)
7786 .sr(1)
7787 .m(6)
7788 .n(8)
7789 .k(2)
7790 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007791 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007792 }
7793
Marat Dukhande06f492020-04-09 00:19:31 -07007794 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007795 TEST_REQUIRES_ARM_NEON;
7796 GemmMicrokernelTester()
7797 .mr(6)
7798 .nr(8)
7799 .kr(1)
7800 .sr(1)
7801 .m(6)
7802 .n(8)
7803 .k(2)
7804 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007805 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007806 }
7807
Marat Dukhande06f492020-04-09 00:19:31 -07007808 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007809 TEST_REQUIRES_ARM_NEON;
7810 GemmMicrokernelTester()
7811 .mr(6)
7812 .nr(8)
7813 .kr(1)
7814 .sr(1)
7815 .m(6)
7816 .n(8)
7817 .k(2)
7818 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007819 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007820 }
7821#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7822
7823
Marat Dukhan1c587112020-04-08 20:04:28 -07007824#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07007825 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007826 TEST_REQUIRES_ARM_NEON_FMA;
7827 GemmMicrokernelTester()
7828 .mr(4)
7829 .nr(4)
7830 .kr(1)
7831 .sr(1)
7832 .m(4)
7833 .n(4)
7834 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007835 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007836 }
7837
Marat Dukhande06f492020-04-09 00:19:31 -07007838 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007839 TEST_REQUIRES_ARM_NEON_FMA;
7840 GemmMicrokernelTester()
7841 .mr(4)
7842 .nr(4)
7843 .kr(1)
7844 .sr(1)
7845 .m(4)
7846 .n(4)
7847 .k(2)
7848 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007849 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007850 }
7851
Marat Dukhande06f492020-04-09 00:19:31 -07007852 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007853 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007854 for (uint32_t n = 1; n <= 4; n++) {
7855 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007856 GemmMicrokernelTester()
7857 .mr(4)
7858 .nr(4)
7859 .kr(1)
7860 .sr(1)
7861 .m(m)
7862 .n(n)
7863 .k(2)
7864 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007865 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007866 }
7867 }
7868 }
7869
Marat Dukhande06f492020-04-09 00:19:31 -07007870 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007871 TEST_REQUIRES_ARM_NEON_FMA;
7872 for (uint32_t m = 1; m <= 4; m++) {
7873 GemmMicrokernelTester()
7874 .mr(4)
7875 .nr(4)
7876 .kr(1)
7877 .sr(1)
7878 .m(m)
7879 .n(4)
7880 .k(2)
7881 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007882 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007883 }
7884 }
7885
Marat Dukhande06f492020-04-09 00:19:31 -07007886 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007887 TEST_REQUIRES_ARM_NEON_FMA;
7888 for (uint32_t n = 1; n <= 4; n++) {
7889 GemmMicrokernelTester()
7890 .mr(4)
7891 .nr(4)
7892 .kr(1)
7893 .sr(1)
7894 .m(4)
7895 .n(n)
7896 .k(2)
7897 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007898 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007899 }
7900 }
7901
Marat Dukhande06f492020-04-09 00:19:31 -07007902 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007903 TEST_REQUIRES_ARM_NEON_FMA;
7904 for (size_t k = 1; k < 2; k++) {
7905 GemmMicrokernelTester()
7906 .mr(4)
7907 .nr(4)
7908 .kr(1)
7909 .sr(1)
7910 .m(4)
7911 .n(4)
7912 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007913 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007914 }
7915 }
7916
Marat Dukhande06f492020-04-09 00:19:31 -07007917 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007918 TEST_REQUIRES_ARM_NEON_FMA;
7919 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007920 for (uint32_t n = 1; n <= 4; n++) {
7921 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007922 GemmMicrokernelTester()
7923 .mr(4)
7924 .nr(4)
7925 .kr(1)
7926 .sr(1)
7927 .m(m)
7928 .n(n)
7929 .k(k)
7930 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007931 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007932 }
7933 }
7934 }
7935 }
7936
Marat Dukhande06f492020-04-09 00:19:31 -07007937 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007938 TEST_REQUIRES_ARM_NEON_FMA;
7939 for (size_t k = 3; k < 4; k++) {
7940 GemmMicrokernelTester()
7941 .mr(4)
7942 .nr(4)
7943 .kr(1)
7944 .sr(1)
7945 .m(4)
7946 .n(4)
7947 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007948 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007949 }
7950 }
7951
Marat Dukhande06f492020-04-09 00:19:31 -07007952 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007953 TEST_REQUIRES_ARM_NEON_FMA;
7954 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007955 for (uint32_t n = 1; n <= 4; n++) {
7956 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007957 GemmMicrokernelTester()
7958 .mr(4)
7959 .nr(4)
7960 .kr(1)
7961 .sr(1)
7962 .m(m)
7963 .n(n)
7964 .k(k)
7965 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007966 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007967 }
7968 }
7969 }
7970 }
7971
Marat Dukhande06f492020-04-09 00:19:31 -07007972 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007973 TEST_REQUIRES_ARM_NEON_FMA;
7974 for (size_t k = 4; k <= 20; k += 2) {
7975 GemmMicrokernelTester()
7976 .mr(4)
7977 .nr(4)
7978 .kr(1)
7979 .sr(1)
7980 .m(4)
7981 .n(4)
7982 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07007983 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07007984 }
7985 }
7986
Marat Dukhande06f492020-04-09 00:19:31 -07007987 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007988 TEST_REQUIRES_ARM_NEON_FMA;
7989 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007990 for (uint32_t n = 1; n <= 4; n++) {
7991 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07007992 GemmMicrokernelTester()
7993 .mr(4)
7994 .nr(4)
7995 .kr(1)
7996 .sr(1)
7997 .m(m)
7998 .n(n)
7999 .k(k)
8000 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008001 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008002 }
8003 }
8004 }
8005 }
8006
Marat Dukhande06f492020-04-09 00:19:31 -07008007 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008008 TEST_REQUIRES_ARM_NEON_FMA;
8009 for (uint32_t n = 5; n < 8; n++) {
8010 for (size_t k = 1; k <= 10; k += 3) {
8011 GemmMicrokernelTester()
8012 .mr(4)
8013 .nr(4)
8014 .kr(1)
8015 .sr(1)
8016 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008017 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008018 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008019 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008020 }
8021 }
8022 }
8023
Marat Dukhande06f492020-04-09 00:19:31 -07008024 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008025 TEST_REQUIRES_ARM_NEON_FMA;
8026 for (uint32_t n = 5; n < 8; n++) {
8027 for (size_t k = 1; k <= 10; k += 3) {
8028 GemmMicrokernelTester()
8029 .mr(4)
8030 .nr(4)
8031 .kr(1)
8032 .sr(1)
8033 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008034 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008035 .k(k)
8036 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008037 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008038 }
8039 }
8040 }
8041
Marat Dukhande06f492020-04-09 00:19:31 -07008042 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008043 TEST_REQUIRES_ARM_NEON_FMA;
8044 for (uint32_t n = 5; n < 8; n++) {
8045 for (size_t k = 1; k <= 10; k += 3) {
8046 for (uint32_t m = 1; m <= 4; m++) {
8047 GemmMicrokernelTester()
8048 .mr(4)
8049 .nr(4)
8050 .kr(1)
8051 .sr(1)
8052 .m(m)
8053 .n(n)
8054 .k(k)
8055 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008056 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008057 }
8058 }
8059 }
8060 }
8061
Marat Dukhande06f492020-04-09 00:19:31 -07008062 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008063 TEST_REQUIRES_ARM_NEON_FMA;
8064 for (uint32_t n = 8; n <= 12; n += 4) {
8065 for (size_t k = 1; k <= 10; k += 3) {
8066 GemmMicrokernelTester()
8067 .mr(4)
8068 .nr(4)
8069 .kr(1)
8070 .sr(1)
8071 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008072 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008073 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008074 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008075 }
8076 }
8077 }
8078
Marat Dukhande06f492020-04-09 00:19:31 -07008079 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008080 TEST_REQUIRES_ARM_NEON_FMA;
8081 for (uint32_t n = 8; n <= 12; n += 4) {
8082 for (size_t k = 1; k <= 10; k += 3) {
8083 GemmMicrokernelTester()
8084 .mr(4)
8085 .nr(4)
8086 .kr(1)
8087 .sr(1)
8088 .m(4)
8089 .n(n)
8090 .k(k)
8091 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008092 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008093 }
8094 }
8095 }
8096
Marat Dukhande06f492020-04-09 00:19:31 -07008097 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008098 TEST_REQUIRES_ARM_NEON_FMA;
8099 for (uint32_t n = 8; n <= 12; n += 4) {
8100 for (size_t k = 1; k <= 10; k += 3) {
8101 for (uint32_t m = 1; m <= 4; m++) {
8102 GemmMicrokernelTester()
8103 .mr(4)
8104 .nr(4)
8105 .kr(1)
8106 .sr(1)
8107 .m(m)
8108 .n(n)
8109 .k(k)
8110 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008111 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008112 }
8113 }
8114 }
8115 }
8116
Marat Dukhande06f492020-04-09 00:19:31 -07008117 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008118 TEST_REQUIRES_ARM_NEON_FMA;
8119 for (size_t k = 1; k <= 10; k += 3) {
8120 GemmMicrokernelTester()
8121 .mr(4)
8122 .nr(4)
8123 .kr(1)
8124 .sr(1)
8125 .m(4)
8126 .n(4)
8127 .k(k)
8128 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008129 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008130 }
8131 }
8132
Marat Dukhande06f492020-04-09 00:19:31 -07008133 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008134 TEST_REQUIRES_ARM_NEON_FMA;
8135 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008136 for (uint32_t n = 1; n <= 4; n++) {
8137 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008138 GemmMicrokernelTester()
8139 .mr(4)
8140 .nr(4)
8141 .kr(1)
8142 .sr(1)
8143 .m(m)
8144 .n(n)
8145 .k(k)
8146 .ks(3)
8147 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008148 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008149 }
8150 }
8151 }
8152 }
8153
Marat Dukhande06f492020-04-09 00:19:31 -07008154 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_gt_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008155 TEST_REQUIRES_ARM_NEON_FMA;
8156 for (uint32_t n = 5; n < 8; n++) {
8157 for (size_t k = 1; k <= 10; k += 3) {
8158 GemmMicrokernelTester()
8159 .mr(4)
8160 .nr(4)
8161 .kr(1)
8162 .sr(1)
8163 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008164 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008165 .k(k)
8166 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008167 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008168 }
8169 }
8170 }
8171
Marat Dukhande06f492020-04-09 00:19:31 -07008172 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, n_div_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008173 TEST_REQUIRES_ARM_NEON_FMA;
8174 for (uint32_t n = 8; n <= 12; n += 4) {
8175 for (size_t k = 1; k <= 10; k += 3) {
8176 GemmMicrokernelTester()
8177 .mr(4)
8178 .nr(4)
8179 .kr(1)
8180 .sr(1)
8181 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008182 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008183 .k(k)
8184 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008185 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008186 }
8187 }
8188 }
8189
Marat Dukhande06f492020-04-09 00:19:31 -07008190 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008191 TEST_REQUIRES_ARM_NEON_FMA;
8192 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008193 for (uint32_t n = 1; n <= 4; n++) {
8194 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008195 GemmMicrokernelTester()
8196 .mr(4)
8197 .nr(4)
8198 .kr(1)
8199 .sr(1)
8200 .m(m)
8201 .n(n)
8202 .k(k)
8203 .cm_stride(7)
8204 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008205 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008206 }
8207 }
8208 }
8209 }
8210
Marat Dukhande06f492020-04-09 00:19:31 -07008211 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008212 TEST_REQUIRES_ARM_NEON_FMA;
8213 for (size_t k = 1; k <= 10; k += 3) {
8214 GemmMicrokernelTester()
8215 .mr(4)
8216 .nr(4)
8217 .kr(1)
8218 .sr(1)
8219 .m(4)
8220 .n(4)
8221 .k(k)
8222 .ks(3)
8223 .a_offset(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008224 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008225 }
8226 }
8227
Marat Dukhande06f492020-04-09 00:19:31 -07008228 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008229 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008230 for (size_t k = 1; k <= 10; k += 3) {
8231 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008232 GemmMicrokernelTester()
8233 .mr(4)
8234 .nr(4)
8235 .kr(1)
8236 .sr(1)
8237 .m(4)
8238 .n(4)
8239 .k(k)
8240 .ks(3)
8241 .a_offset(43)
8242 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008243 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008244 }
8245 }
8246 }
8247
Marat Dukhande06f492020-04-09 00:19:31 -07008248 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008249 TEST_REQUIRES_ARM_NEON_FMA;
8250 GemmMicrokernelTester()
8251 .mr(4)
8252 .nr(4)
8253 .kr(1)
8254 .sr(1)
8255 .m(4)
8256 .n(4)
8257 .k(2)
8258 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008259 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008260 }
8261
Marat Dukhande06f492020-04-09 00:19:31 -07008262 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008263 TEST_REQUIRES_ARM_NEON_FMA;
8264 GemmMicrokernelTester()
8265 .mr(4)
8266 .nr(4)
8267 .kr(1)
8268 .sr(1)
8269 .m(4)
8270 .n(4)
8271 .k(2)
8272 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008273 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008274 }
8275
Marat Dukhande06f492020-04-09 00:19:31 -07008276 TEST(F32_IGEMM_MINMAX_4X4__NEONFMA_LANE_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008277 TEST_REQUIRES_ARM_NEON_FMA;
8278 GemmMicrokernelTester()
8279 .mr(4)
8280 .nr(4)
8281 .kr(1)
8282 .sr(1)
8283 .m(4)
8284 .n(4)
8285 .k(2)
8286 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008287 .Test(xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008288 }
8289#endif // XNN_ARCH_ARM64
8290
8291
8292#if XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07008293 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008294 TEST_REQUIRES_ARM_NEON_FMA;
8295 GemmMicrokernelTester()
8296 .mr(6)
8297 .nr(8)
8298 .kr(1)
8299 .sr(1)
8300 .m(6)
8301 .n(8)
8302 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008303 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008304 }
8305
Marat Dukhande06f492020-04-09 00:19:31 -07008306 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008307 TEST_REQUIRES_ARM_NEON_FMA;
8308 GemmMicrokernelTester()
8309 .mr(6)
8310 .nr(8)
8311 .kr(1)
8312 .sr(1)
8313 .m(6)
8314 .n(8)
8315 .k(4)
8316 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008317 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008318 }
8319
Marat Dukhande06f492020-04-09 00:19:31 -07008320 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008321 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008322 for (uint32_t n = 1; n <= 8; n++) {
8323 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008324 GemmMicrokernelTester()
8325 .mr(6)
8326 .nr(8)
8327 .kr(1)
8328 .sr(1)
8329 .m(m)
8330 .n(n)
8331 .k(4)
8332 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008333 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008334 }
8335 }
8336 }
8337
Marat Dukhande06f492020-04-09 00:19:31 -07008338 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008339 TEST_REQUIRES_ARM_NEON_FMA;
8340 for (uint32_t m = 1; m <= 6; m++) {
8341 GemmMicrokernelTester()
8342 .mr(6)
8343 .nr(8)
8344 .kr(1)
8345 .sr(1)
8346 .m(m)
8347 .n(8)
8348 .k(4)
8349 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008350 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008351 }
8352 }
8353
Marat Dukhande06f492020-04-09 00:19:31 -07008354 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008355 TEST_REQUIRES_ARM_NEON_FMA;
8356 for (uint32_t n = 1; n <= 8; n++) {
8357 GemmMicrokernelTester()
8358 .mr(6)
8359 .nr(8)
8360 .kr(1)
8361 .sr(1)
8362 .m(6)
8363 .n(n)
8364 .k(4)
8365 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008366 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008367 }
8368 }
8369
Marat Dukhande06f492020-04-09 00:19:31 -07008370 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008371 TEST_REQUIRES_ARM_NEON_FMA;
8372 for (size_t k = 1; k < 4; k++) {
8373 GemmMicrokernelTester()
8374 .mr(6)
8375 .nr(8)
8376 .kr(1)
8377 .sr(1)
8378 .m(6)
8379 .n(8)
8380 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008381 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008382 }
8383 }
8384
Marat Dukhande06f492020-04-09 00:19:31 -07008385 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008386 TEST_REQUIRES_ARM_NEON_FMA;
8387 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008388 for (uint32_t n = 1; n <= 8; n++) {
8389 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008390 GemmMicrokernelTester()
8391 .mr(6)
8392 .nr(8)
8393 .kr(1)
8394 .sr(1)
8395 .m(m)
8396 .n(n)
8397 .k(k)
8398 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008399 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008400 }
8401 }
8402 }
8403 }
8404
Marat Dukhande06f492020-04-09 00:19:31 -07008405 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008406 TEST_REQUIRES_ARM_NEON_FMA;
8407 for (size_t k = 5; k < 8; k++) {
8408 GemmMicrokernelTester()
8409 .mr(6)
8410 .nr(8)
8411 .kr(1)
8412 .sr(1)
8413 .m(6)
8414 .n(8)
8415 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008416 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008417 }
8418 }
8419
Marat Dukhande06f492020-04-09 00:19:31 -07008420 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008421 TEST_REQUIRES_ARM_NEON_FMA;
8422 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008423 for (uint32_t n = 1; n <= 8; n++) {
8424 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008425 GemmMicrokernelTester()
8426 .mr(6)
8427 .nr(8)
8428 .kr(1)
8429 .sr(1)
8430 .m(m)
8431 .n(n)
8432 .k(k)
8433 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008434 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008435 }
8436 }
8437 }
8438 }
8439
Marat Dukhande06f492020-04-09 00:19:31 -07008440 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008441 TEST_REQUIRES_ARM_NEON_FMA;
8442 for (size_t k = 8; k <= 40; k += 4) {
8443 GemmMicrokernelTester()
8444 .mr(6)
8445 .nr(8)
8446 .kr(1)
8447 .sr(1)
8448 .m(6)
8449 .n(8)
8450 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008451 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008452 }
8453 }
8454
Marat Dukhande06f492020-04-09 00:19:31 -07008455 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008456 TEST_REQUIRES_ARM_NEON_FMA;
8457 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008458 for (uint32_t n = 1; n <= 8; n++) {
8459 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008460 GemmMicrokernelTester()
8461 .mr(6)
8462 .nr(8)
8463 .kr(1)
8464 .sr(1)
8465 .m(m)
8466 .n(n)
8467 .k(k)
8468 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008469 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008470 }
8471 }
8472 }
8473 }
8474
Marat Dukhande06f492020-04-09 00:19:31 -07008475 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008476 TEST_REQUIRES_ARM_NEON_FMA;
8477 for (uint32_t n = 9; n < 16; n++) {
8478 for (size_t k = 1; k <= 20; k += 5) {
8479 GemmMicrokernelTester()
8480 .mr(6)
8481 .nr(8)
8482 .kr(1)
8483 .sr(1)
8484 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008485 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008486 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008487 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008488 }
8489 }
8490 }
8491
Marat Dukhande06f492020-04-09 00:19:31 -07008492 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008493 TEST_REQUIRES_ARM_NEON_FMA;
8494 for (uint32_t n = 9; n < 16; n++) {
8495 for (size_t k = 1; k <= 20; k += 5) {
8496 GemmMicrokernelTester()
8497 .mr(6)
8498 .nr(8)
8499 .kr(1)
8500 .sr(1)
8501 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008502 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008503 .k(k)
8504 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008505 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008506 }
8507 }
8508 }
8509
Marat Dukhande06f492020-04-09 00:19:31 -07008510 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008511 TEST_REQUIRES_ARM_NEON_FMA;
8512 for (uint32_t n = 9; n < 16; n++) {
8513 for (size_t k = 1; k <= 20; k += 5) {
8514 for (uint32_t m = 1; m <= 6; m++) {
8515 GemmMicrokernelTester()
8516 .mr(6)
8517 .nr(8)
8518 .kr(1)
8519 .sr(1)
8520 .m(m)
8521 .n(n)
8522 .k(k)
8523 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008524 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008525 }
8526 }
8527 }
8528 }
8529
Marat Dukhande06f492020-04-09 00:19:31 -07008530 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008531 TEST_REQUIRES_ARM_NEON_FMA;
8532 for (uint32_t n = 16; n <= 24; n += 8) {
8533 for (size_t k = 1; k <= 20; k += 5) {
8534 GemmMicrokernelTester()
8535 .mr(6)
8536 .nr(8)
8537 .kr(1)
8538 .sr(1)
8539 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008540 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008541 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008542 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008543 }
8544 }
8545 }
8546
Marat Dukhande06f492020-04-09 00:19:31 -07008547 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008548 TEST_REQUIRES_ARM_NEON_FMA;
8549 for (uint32_t n = 16; n <= 24; n += 8) {
8550 for (size_t k = 1; k <= 20; k += 5) {
8551 GemmMicrokernelTester()
8552 .mr(6)
8553 .nr(8)
8554 .kr(1)
8555 .sr(1)
8556 .m(6)
8557 .n(n)
8558 .k(k)
8559 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008560 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008561 }
8562 }
8563 }
8564
Marat Dukhande06f492020-04-09 00:19:31 -07008565 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008566 TEST_REQUIRES_ARM_NEON_FMA;
8567 for (uint32_t n = 16; n <= 24; n += 8) {
8568 for (size_t k = 1; k <= 20; k += 5) {
8569 for (uint32_t m = 1; m <= 6; m++) {
8570 GemmMicrokernelTester()
8571 .mr(6)
8572 .nr(8)
8573 .kr(1)
8574 .sr(1)
8575 .m(m)
8576 .n(n)
8577 .k(k)
8578 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008579 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008580 }
8581 }
8582 }
8583 }
8584
Marat Dukhande06f492020-04-09 00:19:31 -07008585 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008586 TEST_REQUIRES_ARM_NEON_FMA;
8587 for (size_t k = 1; k <= 20; k += 5) {
8588 GemmMicrokernelTester()
8589 .mr(6)
8590 .nr(8)
8591 .kr(1)
8592 .sr(1)
8593 .m(6)
8594 .n(8)
8595 .k(k)
8596 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008597 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008598 }
8599 }
8600
Marat Dukhande06f492020-04-09 00:19:31 -07008601 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008602 TEST_REQUIRES_ARM_NEON_FMA;
8603 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008604 for (uint32_t n = 1; n <= 8; n++) {
8605 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008606 GemmMicrokernelTester()
8607 .mr(6)
8608 .nr(8)
8609 .kr(1)
8610 .sr(1)
8611 .m(m)
8612 .n(n)
8613 .k(k)
8614 .ks(3)
8615 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008616 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008617 }
8618 }
8619 }
8620 }
8621
Marat Dukhande06f492020-04-09 00:19:31 -07008622 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008623 TEST_REQUIRES_ARM_NEON_FMA;
8624 for (uint32_t n = 9; n < 16; n++) {
8625 for (size_t k = 1; k <= 20; k += 5) {
8626 GemmMicrokernelTester()
8627 .mr(6)
8628 .nr(8)
8629 .kr(1)
8630 .sr(1)
8631 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008632 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008633 .k(k)
8634 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008635 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008636 }
8637 }
8638 }
8639
Marat Dukhande06f492020-04-09 00:19:31 -07008640 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008641 TEST_REQUIRES_ARM_NEON_FMA;
8642 for (uint32_t n = 16; n <= 24; n += 8) {
8643 for (size_t k = 1; k <= 20; k += 5) {
8644 GemmMicrokernelTester()
8645 .mr(6)
8646 .nr(8)
8647 .kr(1)
8648 .sr(1)
8649 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008650 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008651 .k(k)
8652 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008653 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008654 }
8655 }
8656 }
8657
Marat Dukhande06f492020-04-09 00:19:31 -07008658 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008659 TEST_REQUIRES_ARM_NEON_FMA;
8660 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008661 for (uint32_t n = 1; n <= 8; n++) {
8662 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008663 GemmMicrokernelTester()
8664 .mr(6)
8665 .nr(8)
8666 .kr(1)
8667 .sr(1)
8668 .m(m)
8669 .n(n)
8670 .k(k)
8671 .cm_stride(11)
8672 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008673 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008674 }
8675 }
8676 }
8677 }
8678
Marat Dukhande06f492020-04-09 00:19:31 -07008679 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008680 TEST_REQUIRES_ARM_NEON_FMA;
8681 for (size_t k = 1; k <= 20; k += 5) {
8682 GemmMicrokernelTester()
8683 .mr(6)
8684 .nr(8)
8685 .kr(1)
8686 .sr(1)
8687 .m(6)
8688 .n(8)
8689 .k(k)
8690 .ks(3)
8691 .a_offset(127)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008692 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008693 }
8694 }
8695
Marat Dukhande06f492020-04-09 00:19:31 -07008696 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008697 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008698 for (size_t k = 1; k <= 20; k += 5) {
8699 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008700 GemmMicrokernelTester()
8701 .mr(6)
8702 .nr(8)
8703 .kr(1)
8704 .sr(1)
8705 .m(6)
8706 .n(8)
8707 .k(k)
8708 .ks(3)
8709 .a_offset(127)
8710 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008711 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008712 }
8713 }
8714 }
8715
Marat Dukhande06f492020-04-09 00:19:31 -07008716 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008717 TEST_REQUIRES_ARM_NEON_FMA;
8718 GemmMicrokernelTester()
8719 .mr(6)
8720 .nr(8)
8721 .kr(1)
8722 .sr(1)
8723 .m(6)
8724 .n(8)
8725 .k(4)
8726 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008727 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008728 }
8729
Marat Dukhande06f492020-04-09 00:19:31 -07008730 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008731 TEST_REQUIRES_ARM_NEON_FMA;
8732 GemmMicrokernelTester()
8733 .mr(6)
8734 .nr(8)
8735 .kr(1)
8736 .sr(1)
8737 .m(6)
8738 .n(8)
8739 .k(4)
8740 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008741 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008742 }
8743
Marat Dukhande06f492020-04-09 00:19:31 -07008744 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008745 TEST_REQUIRES_ARM_NEON_FMA;
8746 GemmMicrokernelTester()
8747 .mr(6)
8748 .nr(8)
8749 .kr(1)
8750 .sr(1)
8751 .m(6)
8752 .n(8)
8753 .k(4)
8754 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008755 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008756 }
8757#endif // XNN_ARCH_ARM64
8758
8759
8760#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07008761 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008762 TEST_REQUIRES_ARM_NEON;
8763 GemmMicrokernelTester()
8764 .mr(1)
8765 .nr(8)
8766 .kr(1)
8767 .sr(1)
8768 .m(1)
8769 .n(8)
8770 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008771 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008772 }
8773
Marat Dukhande06f492020-04-09 00:19:31 -07008774 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008775 TEST_REQUIRES_ARM_NEON;
8776 GemmMicrokernelTester()
8777 .mr(1)
8778 .nr(8)
8779 .kr(1)
8780 .sr(1)
8781 .m(1)
8782 .n(8)
8783 .k(2)
8784 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008785 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008786 }
8787
Marat Dukhande06f492020-04-09 00:19:31 -07008788 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008789 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008790 for (uint32_t n = 1; n <= 8; n++) {
8791 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008792 GemmMicrokernelTester()
8793 .mr(1)
8794 .nr(8)
8795 .kr(1)
8796 .sr(1)
8797 .m(m)
8798 .n(n)
8799 .k(2)
8800 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008801 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008802 }
8803 }
8804 }
8805
Marat Dukhande06f492020-04-09 00:19:31 -07008806 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008807 TEST_REQUIRES_ARM_NEON;
8808 for (uint32_t m = 1; m <= 1; m++) {
8809 GemmMicrokernelTester()
8810 .mr(1)
8811 .nr(8)
8812 .kr(1)
8813 .sr(1)
8814 .m(m)
8815 .n(8)
8816 .k(2)
8817 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008818 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008819 }
8820 }
8821
Marat Dukhande06f492020-04-09 00:19:31 -07008822 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008823 TEST_REQUIRES_ARM_NEON;
8824 for (uint32_t n = 1; n <= 8; n++) {
8825 GemmMicrokernelTester()
8826 .mr(1)
8827 .nr(8)
8828 .kr(1)
8829 .sr(1)
8830 .m(1)
8831 .n(n)
8832 .k(2)
8833 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008834 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008835 }
8836 }
8837
Marat Dukhande06f492020-04-09 00:19:31 -07008838 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008839 TEST_REQUIRES_ARM_NEON;
8840 for (size_t k = 1; k < 2; k++) {
8841 GemmMicrokernelTester()
8842 .mr(1)
8843 .nr(8)
8844 .kr(1)
8845 .sr(1)
8846 .m(1)
8847 .n(8)
8848 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008849 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008850 }
8851 }
8852
Marat Dukhande06f492020-04-09 00:19:31 -07008853 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008854 TEST_REQUIRES_ARM_NEON;
8855 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008856 for (uint32_t n = 1; n <= 8; n++) {
8857 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008858 GemmMicrokernelTester()
8859 .mr(1)
8860 .nr(8)
8861 .kr(1)
8862 .sr(1)
8863 .m(m)
8864 .n(n)
8865 .k(k)
8866 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008867 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008868 }
8869 }
8870 }
8871 }
8872
Marat Dukhande06f492020-04-09 00:19:31 -07008873 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008874 TEST_REQUIRES_ARM_NEON;
8875 for (size_t k = 3; k < 4; k++) {
8876 GemmMicrokernelTester()
8877 .mr(1)
8878 .nr(8)
8879 .kr(1)
8880 .sr(1)
8881 .m(1)
8882 .n(8)
8883 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008884 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008885 }
8886 }
8887
Marat Dukhande06f492020-04-09 00:19:31 -07008888 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008889 TEST_REQUIRES_ARM_NEON;
8890 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008891 for (uint32_t n = 1; n <= 8; n++) {
8892 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008893 GemmMicrokernelTester()
8894 .mr(1)
8895 .nr(8)
8896 .kr(1)
8897 .sr(1)
8898 .m(m)
8899 .n(n)
8900 .k(k)
8901 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008902 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008903 }
8904 }
8905 }
8906 }
8907
Marat Dukhande06f492020-04-09 00:19:31 -07008908 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008909 TEST_REQUIRES_ARM_NEON;
8910 for (size_t k = 4; k <= 20; k += 2) {
8911 GemmMicrokernelTester()
8912 .mr(1)
8913 .nr(8)
8914 .kr(1)
8915 .sr(1)
8916 .m(1)
8917 .n(8)
8918 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008919 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008920 }
8921 }
8922
Marat Dukhande06f492020-04-09 00:19:31 -07008923 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008924 TEST_REQUIRES_ARM_NEON;
8925 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008926 for (uint32_t n = 1; n <= 8; n++) {
8927 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008928 GemmMicrokernelTester()
8929 .mr(1)
8930 .nr(8)
8931 .kr(1)
8932 .sr(1)
8933 .m(m)
8934 .n(n)
8935 .k(k)
8936 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008937 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008938 }
8939 }
8940 }
8941 }
8942
Marat Dukhande06f492020-04-09 00:19:31 -07008943 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008944 TEST_REQUIRES_ARM_NEON;
8945 for (uint32_t n = 9; n < 16; n++) {
8946 for (size_t k = 1; k <= 10; k += 3) {
8947 GemmMicrokernelTester()
8948 .mr(1)
8949 .nr(8)
8950 .kr(1)
8951 .sr(1)
8952 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008953 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008954 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008955 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008956 }
8957 }
8958 }
8959
Marat Dukhande06f492020-04-09 00:19:31 -07008960 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008961 TEST_REQUIRES_ARM_NEON;
8962 for (uint32_t n = 9; n < 16; n++) {
8963 for (size_t k = 1; k <= 10; k += 3) {
8964 GemmMicrokernelTester()
8965 .mr(1)
8966 .nr(8)
8967 .kr(1)
8968 .sr(1)
8969 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008970 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07008971 .k(k)
8972 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008973 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008974 }
8975 }
8976 }
8977
Marat Dukhande06f492020-04-09 00:19:31 -07008978 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008979 TEST_REQUIRES_ARM_NEON;
8980 for (uint32_t n = 9; n < 16; n++) {
8981 for (size_t k = 1; k <= 10; k += 3) {
8982 for (uint32_t m = 1; m <= 1; m++) {
8983 GemmMicrokernelTester()
8984 .mr(1)
8985 .nr(8)
8986 .kr(1)
8987 .sr(1)
8988 .m(m)
8989 .n(n)
8990 .k(k)
8991 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07008992 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07008993 }
8994 }
8995 }
8996 }
8997
Marat Dukhande06f492020-04-09 00:19:31 -07008998 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07008999 TEST_REQUIRES_ARM_NEON;
9000 for (uint32_t n = 16; n <= 24; n += 8) {
9001 for (size_t k = 1; k <= 10; k += 3) {
9002 GemmMicrokernelTester()
9003 .mr(1)
9004 .nr(8)
9005 .kr(1)
9006 .sr(1)
9007 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009008 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009009 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009010 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009011 }
9012 }
9013 }
9014
Marat Dukhande06f492020-04-09 00:19:31 -07009015 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009016 TEST_REQUIRES_ARM_NEON;
9017 for (uint32_t n = 16; n <= 24; n += 8) {
9018 for (size_t k = 1; k <= 10; k += 3) {
9019 GemmMicrokernelTester()
9020 .mr(1)
9021 .nr(8)
9022 .kr(1)
9023 .sr(1)
9024 .m(1)
9025 .n(n)
9026 .k(k)
9027 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009028 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009029 }
9030 }
9031 }
9032
Marat Dukhande06f492020-04-09 00:19:31 -07009033 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009034 TEST_REQUIRES_ARM_NEON;
9035 for (uint32_t n = 16; n <= 24; n += 8) {
9036 for (size_t k = 1; k <= 10; k += 3) {
9037 for (uint32_t m = 1; m <= 1; m++) {
9038 GemmMicrokernelTester()
9039 .mr(1)
9040 .nr(8)
9041 .kr(1)
9042 .sr(1)
9043 .m(m)
9044 .n(n)
9045 .k(k)
9046 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009047 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009048 }
9049 }
9050 }
9051 }
9052
Marat Dukhande06f492020-04-09 00:19:31 -07009053 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009054 TEST_REQUIRES_ARM_NEON;
9055 for (size_t k = 1; k <= 10; k += 3) {
9056 GemmMicrokernelTester()
9057 .mr(1)
9058 .nr(8)
9059 .kr(1)
9060 .sr(1)
9061 .m(1)
9062 .n(8)
9063 .k(k)
9064 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009065 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009066 }
9067 }
9068
Marat Dukhande06f492020-04-09 00:19:31 -07009069 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009070 TEST_REQUIRES_ARM_NEON;
9071 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009072 for (uint32_t n = 1; n <= 8; n++) {
9073 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009074 GemmMicrokernelTester()
9075 .mr(1)
9076 .nr(8)
9077 .kr(1)
9078 .sr(1)
9079 .m(m)
9080 .n(n)
9081 .k(k)
9082 .ks(3)
9083 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009084 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009085 }
9086 }
9087 }
9088 }
9089
Marat Dukhande06f492020-04-09 00:19:31 -07009090 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009091 TEST_REQUIRES_ARM_NEON;
9092 for (uint32_t n = 9; n < 16; n++) {
9093 for (size_t k = 1; k <= 10; k += 3) {
9094 GemmMicrokernelTester()
9095 .mr(1)
9096 .nr(8)
9097 .kr(1)
9098 .sr(1)
9099 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009100 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009101 .k(k)
9102 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009103 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009104 }
9105 }
9106 }
9107
Marat Dukhande06f492020-04-09 00:19:31 -07009108 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009109 TEST_REQUIRES_ARM_NEON;
9110 for (uint32_t n = 16; n <= 24; n += 8) {
9111 for (size_t k = 1; k <= 10; k += 3) {
9112 GemmMicrokernelTester()
9113 .mr(1)
9114 .nr(8)
9115 .kr(1)
9116 .sr(1)
9117 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009118 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009119 .k(k)
9120 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009121 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009122 }
9123 }
9124 }
9125
Marat Dukhande06f492020-04-09 00:19:31 -07009126 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009127 TEST_REQUIRES_ARM_NEON;
9128 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009129 for (uint32_t n = 1; n <= 8; n++) {
9130 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009131 GemmMicrokernelTester()
9132 .mr(1)
9133 .nr(8)
9134 .kr(1)
9135 .sr(1)
9136 .m(m)
9137 .n(n)
9138 .k(k)
9139 .cm_stride(11)
9140 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009141 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009142 }
9143 }
9144 }
9145 }
9146
Marat Dukhande06f492020-04-09 00:19:31 -07009147 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009148 TEST_REQUIRES_ARM_NEON;
9149 for (size_t k = 1; k <= 10; k += 3) {
9150 GemmMicrokernelTester()
9151 .mr(1)
9152 .nr(8)
9153 .kr(1)
9154 .sr(1)
9155 .m(1)
9156 .n(8)
9157 .k(k)
9158 .ks(3)
9159 .a_offset(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009160 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009161 }
9162 }
9163
Marat Dukhande06f492020-04-09 00:19:31 -07009164 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009165 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009166 for (size_t k = 1; k <= 10; k += 3) {
9167 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009168 GemmMicrokernelTester()
9169 .mr(1)
9170 .nr(8)
9171 .kr(1)
9172 .sr(1)
9173 .m(1)
9174 .n(8)
9175 .k(k)
9176 .ks(3)
9177 .a_offset(13)
9178 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009179 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009180 }
9181 }
9182 }
9183
Marat Dukhande06f492020-04-09 00:19:31 -07009184 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009185 TEST_REQUIRES_ARM_NEON;
9186 GemmMicrokernelTester()
9187 .mr(1)
9188 .nr(8)
9189 .kr(1)
9190 .sr(1)
9191 .m(1)
9192 .n(8)
9193 .k(2)
9194 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009195 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009196 }
9197
Marat Dukhande06f492020-04-09 00:19:31 -07009198 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009199 TEST_REQUIRES_ARM_NEON;
9200 GemmMicrokernelTester()
9201 .mr(1)
9202 .nr(8)
9203 .kr(1)
9204 .sr(1)
9205 .m(1)
9206 .n(8)
9207 .k(2)
9208 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009209 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009210 }
9211
Marat Dukhande06f492020-04-09 00:19:31 -07009212 TEST(F32_IGEMM_MINMAX_1X8__NEON_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009213 TEST_REQUIRES_ARM_NEON;
9214 GemmMicrokernelTester()
9215 .mr(1)
9216 .nr(8)
9217 .kr(1)
9218 .sr(1)
9219 .m(1)
9220 .n(8)
9221 .k(2)
9222 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009223 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009224 }
9225#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9226
9227
9228#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009229 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009230 TEST_REQUIRES_ARM_NEON;
9231 GemmMicrokernelTester()
9232 .mr(6)
9233 .nr(8)
9234 .kr(1)
9235 .sr(1)
9236 .m(6)
9237 .n(8)
9238 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009239 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009240 }
9241
Marat Dukhande06f492020-04-09 00:19:31 -07009242 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009243 TEST_REQUIRES_ARM_NEON;
9244 GemmMicrokernelTester()
9245 .mr(6)
9246 .nr(8)
9247 .kr(1)
9248 .sr(1)
9249 .m(6)
9250 .n(8)
9251 .k(2)
9252 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009253 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009254 }
9255
Marat Dukhande06f492020-04-09 00:19:31 -07009256 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009257 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009258 for (uint32_t n = 1; n <= 8; n++) {
9259 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009260 GemmMicrokernelTester()
9261 .mr(6)
9262 .nr(8)
9263 .kr(1)
9264 .sr(1)
9265 .m(m)
9266 .n(n)
9267 .k(2)
9268 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009269 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009270 }
9271 }
9272 }
9273
Marat Dukhande06f492020-04-09 00:19:31 -07009274 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009275 TEST_REQUIRES_ARM_NEON;
9276 for (uint32_t m = 1; m <= 6; m++) {
9277 GemmMicrokernelTester()
9278 .mr(6)
9279 .nr(8)
9280 .kr(1)
9281 .sr(1)
9282 .m(m)
9283 .n(8)
9284 .k(2)
9285 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009286 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009287 }
9288 }
9289
Marat Dukhande06f492020-04-09 00:19:31 -07009290 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009291 TEST_REQUIRES_ARM_NEON;
9292 for (uint32_t n = 1; n <= 8; n++) {
9293 GemmMicrokernelTester()
9294 .mr(6)
9295 .nr(8)
9296 .kr(1)
9297 .sr(1)
9298 .m(6)
9299 .n(n)
9300 .k(2)
9301 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009302 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009303 }
9304 }
9305
Marat Dukhande06f492020-04-09 00:19:31 -07009306 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009307 TEST_REQUIRES_ARM_NEON;
9308 for (size_t k = 1; k < 2; k++) {
9309 GemmMicrokernelTester()
9310 .mr(6)
9311 .nr(8)
9312 .kr(1)
9313 .sr(1)
9314 .m(6)
9315 .n(8)
9316 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009317 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009318 }
9319 }
9320
Marat Dukhande06f492020-04-09 00:19:31 -07009321 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009322 TEST_REQUIRES_ARM_NEON;
9323 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009324 for (uint32_t n = 1; n <= 8; n++) {
9325 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009326 GemmMicrokernelTester()
9327 .mr(6)
9328 .nr(8)
9329 .kr(1)
9330 .sr(1)
9331 .m(m)
9332 .n(n)
9333 .k(k)
9334 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009335 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009336 }
9337 }
9338 }
9339 }
9340
Marat Dukhande06f492020-04-09 00:19:31 -07009341 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009342 TEST_REQUIRES_ARM_NEON;
9343 for (size_t k = 3; k < 4; k++) {
9344 GemmMicrokernelTester()
9345 .mr(6)
9346 .nr(8)
9347 .kr(1)
9348 .sr(1)
9349 .m(6)
9350 .n(8)
9351 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009352 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009353 }
9354 }
9355
Marat Dukhande06f492020-04-09 00:19:31 -07009356 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009357 TEST_REQUIRES_ARM_NEON;
9358 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009359 for (uint32_t n = 1; n <= 8; n++) {
9360 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009361 GemmMicrokernelTester()
9362 .mr(6)
9363 .nr(8)
9364 .kr(1)
9365 .sr(1)
9366 .m(m)
9367 .n(n)
9368 .k(k)
9369 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009370 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009371 }
9372 }
9373 }
9374 }
9375
Marat Dukhande06f492020-04-09 00:19:31 -07009376 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009377 TEST_REQUIRES_ARM_NEON;
9378 for (size_t k = 4; k <= 20; k += 2) {
9379 GemmMicrokernelTester()
9380 .mr(6)
9381 .nr(8)
9382 .kr(1)
9383 .sr(1)
9384 .m(6)
9385 .n(8)
9386 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009387 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009388 }
9389 }
9390
Marat Dukhande06f492020-04-09 00:19:31 -07009391 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009392 TEST_REQUIRES_ARM_NEON;
9393 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009394 for (uint32_t n = 1; n <= 8; n++) {
9395 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009396 GemmMicrokernelTester()
9397 .mr(6)
9398 .nr(8)
9399 .kr(1)
9400 .sr(1)
9401 .m(m)
9402 .n(n)
9403 .k(k)
9404 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009405 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009406 }
9407 }
9408 }
9409 }
9410
Marat Dukhande06f492020-04-09 00:19:31 -07009411 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009412 TEST_REQUIRES_ARM_NEON;
9413 for (uint32_t n = 9; n < 16; n++) {
9414 for (size_t k = 1; k <= 10; k += 3) {
9415 GemmMicrokernelTester()
9416 .mr(6)
9417 .nr(8)
9418 .kr(1)
9419 .sr(1)
9420 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009421 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009422 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009423 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009424 }
9425 }
9426 }
9427
Marat Dukhande06f492020-04-09 00:19:31 -07009428 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009429 TEST_REQUIRES_ARM_NEON;
9430 for (uint32_t n = 9; n < 16; n++) {
9431 for (size_t k = 1; k <= 10; k += 3) {
9432 GemmMicrokernelTester()
9433 .mr(6)
9434 .nr(8)
9435 .kr(1)
9436 .sr(1)
9437 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009438 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009439 .k(k)
9440 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009441 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009442 }
9443 }
9444 }
9445
Marat Dukhande06f492020-04-09 00:19:31 -07009446 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009447 TEST_REQUIRES_ARM_NEON;
9448 for (uint32_t n = 9; n < 16; n++) {
9449 for (size_t k = 1; k <= 10; k += 3) {
9450 for (uint32_t m = 1; m <= 6; m++) {
9451 GemmMicrokernelTester()
9452 .mr(6)
9453 .nr(8)
9454 .kr(1)
9455 .sr(1)
9456 .m(m)
9457 .n(n)
9458 .k(k)
9459 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009460 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009461 }
9462 }
9463 }
9464 }
9465
Marat Dukhande06f492020-04-09 00:19:31 -07009466 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009467 TEST_REQUIRES_ARM_NEON;
9468 for (uint32_t n = 16; n <= 24; n += 8) {
9469 for (size_t k = 1; k <= 10; k += 3) {
9470 GemmMicrokernelTester()
9471 .mr(6)
9472 .nr(8)
9473 .kr(1)
9474 .sr(1)
9475 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009476 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009477 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009478 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009479 }
9480 }
9481 }
9482
Marat Dukhande06f492020-04-09 00:19:31 -07009483 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009484 TEST_REQUIRES_ARM_NEON;
9485 for (uint32_t n = 16; n <= 24; n += 8) {
9486 for (size_t k = 1; k <= 10; k += 3) {
9487 GemmMicrokernelTester()
9488 .mr(6)
9489 .nr(8)
9490 .kr(1)
9491 .sr(1)
9492 .m(6)
9493 .n(n)
9494 .k(k)
9495 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009496 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009497 }
9498 }
9499 }
9500
Marat Dukhande06f492020-04-09 00:19:31 -07009501 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009502 TEST_REQUIRES_ARM_NEON;
9503 for (uint32_t n = 16; n <= 24; n += 8) {
9504 for (size_t k = 1; k <= 10; k += 3) {
9505 for (uint32_t m = 1; m <= 6; m++) {
9506 GemmMicrokernelTester()
9507 .mr(6)
9508 .nr(8)
9509 .kr(1)
9510 .sr(1)
9511 .m(m)
9512 .n(n)
9513 .k(k)
9514 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009515 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009516 }
9517 }
9518 }
9519 }
9520
Marat Dukhande06f492020-04-09 00:19:31 -07009521 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009522 TEST_REQUIRES_ARM_NEON;
9523 for (size_t k = 1; k <= 10; k += 3) {
9524 GemmMicrokernelTester()
9525 .mr(6)
9526 .nr(8)
9527 .kr(1)
9528 .sr(1)
9529 .m(6)
9530 .n(8)
9531 .k(k)
9532 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009533 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009534 }
9535 }
9536
Marat Dukhande06f492020-04-09 00:19:31 -07009537 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009538 TEST_REQUIRES_ARM_NEON;
9539 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009540 for (uint32_t n = 1; n <= 8; n++) {
9541 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009542 GemmMicrokernelTester()
9543 .mr(6)
9544 .nr(8)
9545 .kr(1)
9546 .sr(1)
9547 .m(m)
9548 .n(n)
9549 .k(k)
9550 .ks(3)
9551 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009552 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009553 }
9554 }
9555 }
9556 }
9557
Marat Dukhande06f492020-04-09 00:19:31 -07009558 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009559 TEST_REQUIRES_ARM_NEON;
9560 for (uint32_t n = 9; n < 16; n++) {
9561 for (size_t k = 1; k <= 10; k += 3) {
9562 GemmMicrokernelTester()
9563 .mr(6)
9564 .nr(8)
9565 .kr(1)
9566 .sr(1)
9567 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009568 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009569 .k(k)
9570 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009571 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009572 }
9573 }
9574 }
9575
Marat Dukhande06f492020-04-09 00:19:31 -07009576 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009577 TEST_REQUIRES_ARM_NEON;
9578 for (uint32_t n = 16; n <= 24; n += 8) {
9579 for (size_t k = 1; k <= 10; k += 3) {
9580 GemmMicrokernelTester()
9581 .mr(6)
9582 .nr(8)
9583 .kr(1)
9584 .sr(1)
9585 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009586 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009587 .k(k)
9588 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009589 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009590 }
9591 }
9592 }
9593
Marat Dukhande06f492020-04-09 00:19:31 -07009594 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009595 TEST_REQUIRES_ARM_NEON;
9596 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009597 for (uint32_t n = 1; n <= 8; n++) {
9598 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009599 GemmMicrokernelTester()
9600 .mr(6)
9601 .nr(8)
9602 .kr(1)
9603 .sr(1)
9604 .m(m)
9605 .n(n)
9606 .k(k)
9607 .cm_stride(11)
9608 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009609 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009610 }
9611 }
9612 }
9613 }
9614
Marat Dukhande06f492020-04-09 00:19:31 -07009615 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009616 TEST_REQUIRES_ARM_NEON;
9617 for (size_t k = 1; k <= 10; k += 3) {
9618 GemmMicrokernelTester()
9619 .mr(6)
9620 .nr(8)
9621 .kr(1)
9622 .sr(1)
9623 .m(6)
9624 .n(8)
9625 .k(k)
9626 .ks(3)
9627 .a_offset(67)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009628 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009629 }
9630 }
9631
Marat Dukhande06f492020-04-09 00:19:31 -07009632 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009633 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009634 for (size_t k = 1; k <= 10; k += 3) {
9635 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009636 GemmMicrokernelTester()
9637 .mr(6)
9638 .nr(8)
9639 .kr(1)
9640 .sr(1)
9641 .m(6)
9642 .n(8)
9643 .k(k)
9644 .ks(3)
9645 .a_offset(67)
9646 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009647 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009648 }
9649 }
9650 }
9651
Marat Dukhande06f492020-04-09 00:19:31 -07009652 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009653 TEST_REQUIRES_ARM_NEON;
9654 GemmMicrokernelTester()
9655 .mr(6)
9656 .nr(8)
9657 .kr(1)
9658 .sr(1)
9659 .m(6)
9660 .n(8)
9661 .k(2)
9662 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009663 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009664 }
9665
Marat Dukhande06f492020-04-09 00:19:31 -07009666 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009667 TEST_REQUIRES_ARM_NEON;
9668 GemmMicrokernelTester()
9669 .mr(6)
9670 .nr(8)
9671 .kr(1)
9672 .sr(1)
9673 .m(6)
9674 .n(8)
9675 .k(2)
9676 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009677 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009678 }
9679
Marat Dukhande06f492020-04-09 00:19:31 -07009680 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009681 TEST_REQUIRES_ARM_NEON;
9682 GemmMicrokernelTester()
9683 .mr(6)
9684 .nr(8)
9685 .kr(1)
9686 .sr(1)
9687 .m(6)
9688 .n(8)
9689 .k(2)
9690 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009691 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009692 }
9693#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9694
9695
9696#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -07009697 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009698 TEST_REQUIRES_ARM_NEON;
9699 GemmMicrokernelTester()
9700 .mr(6)
9701 .nr(8)
9702 .kr(1)
9703 .sr(1)
9704 .m(6)
9705 .n(8)
9706 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009707 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009708 }
9709
Marat Dukhande06f492020-04-09 00:19:31 -07009710 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009711 TEST_REQUIRES_ARM_NEON;
9712 GemmMicrokernelTester()
9713 .mr(6)
9714 .nr(8)
9715 .kr(1)
9716 .sr(1)
9717 .m(6)
9718 .n(8)
9719 .k(4)
9720 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009721 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009722 }
9723
Marat Dukhande06f492020-04-09 00:19:31 -07009724 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009725 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009726 for (uint32_t n = 1; n <= 8; n++) {
9727 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009728 GemmMicrokernelTester()
9729 .mr(6)
9730 .nr(8)
9731 .kr(1)
9732 .sr(1)
9733 .m(m)
9734 .n(n)
9735 .k(4)
9736 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009737 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009738 }
9739 }
9740 }
9741
Marat Dukhande06f492020-04-09 00:19:31 -07009742 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009743 TEST_REQUIRES_ARM_NEON;
9744 for (uint32_t m = 1; m <= 6; m++) {
9745 GemmMicrokernelTester()
9746 .mr(6)
9747 .nr(8)
9748 .kr(1)
9749 .sr(1)
9750 .m(m)
9751 .n(8)
9752 .k(4)
9753 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009754 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009755 }
9756 }
9757
Marat Dukhande06f492020-04-09 00:19:31 -07009758 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009759 TEST_REQUIRES_ARM_NEON;
9760 for (uint32_t n = 1; n <= 8; n++) {
9761 GemmMicrokernelTester()
9762 .mr(6)
9763 .nr(8)
9764 .kr(1)
9765 .sr(1)
9766 .m(6)
9767 .n(n)
9768 .k(4)
9769 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009770 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009771 }
9772 }
9773
Marat Dukhande06f492020-04-09 00:19:31 -07009774 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009775 TEST_REQUIRES_ARM_NEON;
9776 for (size_t k = 1; k < 4; k++) {
9777 GemmMicrokernelTester()
9778 .mr(6)
9779 .nr(8)
9780 .kr(1)
9781 .sr(1)
9782 .m(6)
9783 .n(8)
9784 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009785 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009786 }
9787 }
9788
Marat Dukhande06f492020-04-09 00:19:31 -07009789 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009790 TEST_REQUIRES_ARM_NEON;
9791 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009792 for (uint32_t n = 1; n <= 8; n++) {
9793 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009794 GemmMicrokernelTester()
9795 .mr(6)
9796 .nr(8)
9797 .kr(1)
9798 .sr(1)
9799 .m(m)
9800 .n(n)
9801 .k(k)
9802 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009803 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009804 }
9805 }
9806 }
9807 }
9808
Marat Dukhande06f492020-04-09 00:19:31 -07009809 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009810 TEST_REQUIRES_ARM_NEON;
9811 for (size_t k = 5; k < 8; k++) {
9812 GemmMicrokernelTester()
9813 .mr(6)
9814 .nr(8)
9815 .kr(1)
9816 .sr(1)
9817 .m(6)
9818 .n(8)
9819 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009820 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009821 }
9822 }
9823
Marat Dukhande06f492020-04-09 00:19:31 -07009824 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009825 TEST_REQUIRES_ARM_NEON;
9826 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009827 for (uint32_t n = 1; n <= 8; n++) {
9828 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009829 GemmMicrokernelTester()
9830 .mr(6)
9831 .nr(8)
9832 .kr(1)
9833 .sr(1)
9834 .m(m)
9835 .n(n)
9836 .k(k)
9837 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009838 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009839 }
9840 }
9841 }
9842 }
9843
Marat Dukhande06f492020-04-09 00:19:31 -07009844 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009845 TEST_REQUIRES_ARM_NEON;
9846 for (size_t k = 8; k <= 40; k += 4) {
9847 GemmMicrokernelTester()
9848 .mr(6)
9849 .nr(8)
9850 .kr(1)
9851 .sr(1)
9852 .m(6)
9853 .n(8)
9854 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009855 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009856 }
9857 }
9858
Marat Dukhande06f492020-04-09 00:19:31 -07009859 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009860 TEST_REQUIRES_ARM_NEON;
9861 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009862 for (uint32_t n = 1; n <= 8; n++) {
9863 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009864 GemmMicrokernelTester()
9865 .mr(6)
9866 .nr(8)
9867 .kr(1)
9868 .sr(1)
9869 .m(m)
9870 .n(n)
9871 .k(k)
9872 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009873 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009874 }
9875 }
9876 }
9877 }
9878
Marat Dukhande06f492020-04-09 00:19:31 -07009879 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009880 TEST_REQUIRES_ARM_NEON;
9881 for (uint32_t n = 9; n < 16; n++) {
9882 for (size_t k = 1; k <= 20; k += 5) {
9883 GemmMicrokernelTester()
9884 .mr(6)
9885 .nr(8)
9886 .kr(1)
9887 .sr(1)
9888 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009889 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009890 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009891 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009892 }
9893 }
9894 }
9895
Marat Dukhande06f492020-04-09 00:19:31 -07009896 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009897 TEST_REQUIRES_ARM_NEON;
9898 for (uint32_t n = 9; n < 16; n++) {
9899 for (size_t k = 1; k <= 20; k += 5) {
9900 GemmMicrokernelTester()
9901 .mr(6)
9902 .nr(8)
9903 .kr(1)
9904 .sr(1)
9905 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009906 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009907 .k(k)
9908 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009909 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009910 }
9911 }
9912 }
9913
Marat Dukhande06f492020-04-09 00:19:31 -07009914 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009915 TEST_REQUIRES_ARM_NEON;
9916 for (uint32_t n = 9; n < 16; n++) {
9917 for (size_t k = 1; k <= 20; k += 5) {
9918 for (uint32_t m = 1; m <= 6; m++) {
9919 GemmMicrokernelTester()
9920 .mr(6)
9921 .nr(8)
9922 .kr(1)
9923 .sr(1)
9924 .m(m)
9925 .n(n)
9926 .k(k)
9927 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009928 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009929 }
9930 }
9931 }
9932 }
9933
Marat Dukhande06f492020-04-09 00:19:31 -07009934 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009935 TEST_REQUIRES_ARM_NEON;
9936 for (uint32_t n = 16; n <= 24; n += 8) {
9937 for (size_t k = 1; k <= 20; k += 5) {
9938 GemmMicrokernelTester()
9939 .mr(6)
9940 .nr(8)
9941 .kr(1)
9942 .sr(1)
9943 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009944 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -07009945 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009946 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009947 }
9948 }
9949 }
9950
Marat Dukhande06f492020-04-09 00:19:31 -07009951 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009952 TEST_REQUIRES_ARM_NEON;
9953 for (uint32_t n = 16; n <= 24; n += 8) {
9954 for (size_t k = 1; k <= 20; k += 5) {
9955 GemmMicrokernelTester()
9956 .mr(6)
9957 .nr(8)
9958 .kr(1)
9959 .sr(1)
9960 .m(6)
9961 .n(n)
9962 .k(k)
9963 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009964 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009965 }
9966 }
9967 }
9968
Marat Dukhande06f492020-04-09 00:19:31 -07009969 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009970 TEST_REQUIRES_ARM_NEON;
9971 for (uint32_t n = 16; n <= 24; n += 8) {
9972 for (size_t k = 1; k <= 20; k += 5) {
9973 for (uint32_t m = 1; m <= 6; m++) {
9974 GemmMicrokernelTester()
9975 .mr(6)
9976 .nr(8)
9977 .kr(1)
9978 .sr(1)
9979 .m(m)
9980 .n(n)
9981 .k(k)
9982 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -07009983 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -07009984 }
9985 }
9986 }
9987 }
9988
Marat Dukhande06f492020-04-09 00:19:31 -07009989 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -07009990 TEST_REQUIRES_ARM_NEON;
9991 for (size_t k = 1; k <= 20; k += 5) {
9992 GemmMicrokernelTester()
9993 .mr(6)
9994 .nr(8)
9995 .kr(1)
9996 .sr(1)
9997 .m(6)
9998 .n(8)
9999 .k(k)
10000 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010001 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010002 }
10003 }
10004
Marat Dukhande06f492020-04-09 00:19:31 -070010005 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010006 TEST_REQUIRES_ARM_NEON;
10007 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010008 for (uint32_t n = 1; n <= 8; n++) {
10009 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010010 GemmMicrokernelTester()
10011 .mr(6)
10012 .nr(8)
10013 .kr(1)
10014 .sr(1)
10015 .m(m)
10016 .n(n)
10017 .k(k)
10018 .ks(3)
10019 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010020 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010021 }
10022 }
10023 }
10024 }
10025
Marat Dukhande06f492020-04-09 00:19:31 -070010026 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010027 TEST_REQUIRES_ARM_NEON;
10028 for (uint32_t n = 9; n < 16; n++) {
10029 for (size_t k = 1; k <= 20; k += 5) {
10030 GemmMicrokernelTester()
10031 .mr(6)
10032 .nr(8)
10033 .kr(1)
10034 .sr(1)
10035 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010036 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010037 .k(k)
10038 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010039 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010040 }
10041 }
10042 }
10043
Marat Dukhande06f492020-04-09 00:19:31 -070010044 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010045 TEST_REQUIRES_ARM_NEON;
10046 for (uint32_t n = 16; n <= 24; n += 8) {
10047 for (size_t k = 1; k <= 20; k += 5) {
10048 GemmMicrokernelTester()
10049 .mr(6)
10050 .nr(8)
10051 .kr(1)
10052 .sr(1)
10053 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010054 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010055 .k(k)
10056 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010057 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010058 }
10059 }
10060 }
10061
Marat Dukhande06f492020-04-09 00:19:31 -070010062 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010063 TEST_REQUIRES_ARM_NEON;
10064 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010065 for (uint32_t n = 1; n <= 8; n++) {
10066 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010067 GemmMicrokernelTester()
10068 .mr(6)
10069 .nr(8)
10070 .kr(1)
10071 .sr(1)
10072 .m(m)
10073 .n(n)
10074 .k(k)
10075 .cm_stride(11)
10076 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010077 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010078 }
10079 }
10080 }
10081 }
10082
Marat Dukhande06f492020-04-09 00:19:31 -070010083 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010084 TEST_REQUIRES_ARM_NEON;
10085 for (size_t k = 1; k <= 20; k += 5) {
10086 GemmMicrokernelTester()
10087 .mr(6)
10088 .nr(8)
10089 .kr(1)
10090 .sr(1)
10091 .m(6)
10092 .n(8)
10093 .k(k)
10094 .ks(3)
10095 .a_offset(127)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010096 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010097 }
10098 }
10099
Marat Dukhande06f492020-04-09 00:19:31 -070010100 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010101 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010102 for (size_t k = 1; k <= 20; k += 5) {
10103 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010104 GemmMicrokernelTester()
10105 .mr(6)
10106 .nr(8)
10107 .kr(1)
10108 .sr(1)
10109 .m(6)
10110 .n(8)
10111 .k(k)
10112 .ks(3)
10113 .a_offset(127)
10114 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010115 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010116 }
10117 }
10118 }
10119
Marat Dukhande06f492020-04-09 00:19:31 -070010120 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010121 TEST_REQUIRES_ARM_NEON;
10122 GemmMicrokernelTester()
10123 .mr(6)
10124 .nr(8)
10125 .kr(1)
10126 .sr(1)
10127 .m(6)
10128 .n(8)
10129 .k(4)
10130 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010131 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010132 }
10133
Marat Dukhande06f492020-04-09 00:19:31 -070010134 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010135 TEST_REQUIRES_ARM_NEON;
10136 GemmMicrokernelTester()
10137 .mr(6)
10138 .nr(8)
10139 .kr(1)
10140 .sr(1)
10141 .m(6)
10142 .n(8)
10143 .k(4)
10144 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010145 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010146 }
10147
Marat Dukhande06f492020-04-09 00:19:31 -070010148 TEST(F32_IGEMM_MINMAX_6X8__NEON_DUP_LD128, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010149 TEST_REQUIRES_ARM_NEON;
10150 GemmMicrokernelTester()
10151 .mr(6)
10152 .nr(8)
10153 .kr(1)
10154 .sr(1)
10155 .m(6)
10156 .n(8)
10157 .k(4)
10158 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010159 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010160 }
10161#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10162
10163
10164#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010165 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010166 TEST_REQUIRES_ARM_NEON_FMA;
10167 GemmMicrokernelTester()
10168 .mr(1)
10169 .nr(8)
10170 .kr(1)
10171 .sr(1)
10172 .m(1)
10173 .n(8)
10174 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010175 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010176 }
10177
Marat Dukhande06f492020-04-09 00:19:31 -070010178 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010179 TEST_REQUIRES_ARM_NEON_FMA;
10180 GemmMicrokernelTester()
10181 .mr(1)
10182 .nr(8)
10183 .kr(1)
10184 .sr(1)
10185 .m(1)
10186 .n(8)
10187 .k(2)
10188 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010189 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010190 }
10191
Marat Dukhande06f492020-04-09 00:19:31 -070010192 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010193 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010194 for (uint32_t n = 1; n <= 8; n++) {
10195 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010196 GemmMicrokernelTester()
10197 .mr(1)
10198 .nr(8)
10199 .kr(1)
10200 .sr(1)
10201 .m(m)
10202 .n(n)
10203 .k(2)
10204 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010205 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010206 }
10207 }
10208 }
10209
Marat Dukhande06f492020-04-09 00:19:31 -070010210 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010211 TEST_REQUIRES_ARM_NEON_FMA;
10212 for (uint32_t m = 1; m <= 1; m++) {
10213 GemmMicrokernelTester()
10214 .mr(1)
10215 .nr(8)
10216 .kr(1)
10217 .sr(1)
10218 .m(m)
10219 .n(8)
10220 .k(2)
10221 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010222 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010223 }
10224 }
10225
Marat Dukhande06f492020-04-09 00:19:31 -070010226 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010227 TEST_REQUIRES_ARM_NEON_FMA;
10228 for (uint32_t n = 1; n <= 8; n++) {
10229 GemmMicrokernelTester()
10230 .mr(1)
10231 .nr(8)
10232 .kr(1)
10233 .sr(1)
10234 .m(1)
10235 .n(n)
10236 .k(2)
10237 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010238 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010239 }
10240 }
10241
Marat Dukhande06f492020-04-09 00:19:31 -070010242 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010243 TEST_REQUIRES_ARM_NEON_FMA;
10244 for (size_t k = 1; k < 2; k++) {
10245 GemmMicrokernelTester()
10246 .mr(1)
10247 .nr(8)
10248 .kr(1)
10249 .sr(1)
10250 .m(1)
10251 .n(8)
10252 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010253 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010254 }
10255 }
10256
Marat Dukhande06f492020-04-09 00:19:31 -070010257 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010258 TEST_REQUIRES_ARM_NEON_FMA;
10259 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010260 for (uint32_t n = 1; n <= 8; n++) {
10261 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010262 GemmMicrokernelTester()
10263 .mr(1)
10264 .nr(8)
10265 .kr(1)
10266 .sr(1)
10267 .m(m)
10268 .n(n)
10269 .k(k)
10270 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010271 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010272 }
10273 }
10274 }
10275 }
10276
Marat Dukhande06f492020-04-09 00:19:31 -070010277 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010278 TEST_REQUIRES_ARM_NEON_FMA;
10279 for (size_t k = 3; k < 4; k++) {
10280 GemmMicrokernelTester()
10281 .mr(1)
10282 .nr(8)
10283 .kr(1)
10284 .sr(1)
10285 .m(1)
10286 .n(8)
10287 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010288 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010289 }
10290 }
10291
Marat Dukhande06f492020-04-09 00:19:31 -070010292 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010293 TEST_REQUIRES_ARM_NEON_FMA;
10294 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010295 for (uint32_t n = 1; n <= 8; n++) {
10296 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010297 GemmMicrokernelTester()
10298 .mr(1)
10299 .nr(8)
10300 .kr(1)
10301 .sr(1)
10302 .m(m)
10303 .n(n)
10304 .k(k)
10305 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010306 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010307 }
10308 }
10309 }
10310 }
10311
Marat Dukhande06f492020-04-09 00:19:31 -070010312 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010313 TEST_REQUIRES_ARM_NEON_FMA;
10314 for (size_t k = 4; k <= 20; k += 2) {
10315 GemmMicrokernelTester()
10316 .mr(1)
10317 .nr(8)
10318 .kr(1)
10319 .sr(1)
10320 .m(1)
10321 .n(8)
10322 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010323 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010324 }
10325 }
10326
Marat Dukhande06f492020-04-09 00:19:31 -070010327 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010328 TEST_REQUIRES_ARM_NEON_FMA;
10329 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010330 for (uint32_t n = 1; n <= 8; n++) {
10331 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010332 GemmMicrokernelTester()
10333 .mr(1)
10334 .nr(8)
10335 .kr(1)
10336 .sr(1)
10337 .m(m)
10338 .n(n)
10339 .k(k)
10340 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010341 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010342 }
10343 }
10344 }
10345 }
10346
Marat Dukhande06f492020-04-09 00:19:31 -070010347 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010348 TEST_REQUIRES_ARM_NEON_FMA;
10349 for (uint32_t n = 9; n < 16; n++) {
10350 for (size_t k = 1; k <= 10; k += 3) {
10351 GemmMicrokernelTester()
10352 .mr(1)
10353 .nr(8)
10354 .kr(1)
10355 .sr(1)
10356 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010357 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010358 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010359 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010360 }
10361 }
10362 }
10363
Marat Dukhande06f492020-04-09 00:19:31 -070010364 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010365 TEST_REQUIRES_ARM_NEON_FMA;
10366 for (uint32_t n = 9; n < 16; n++) {
10367 for (size_t k = 1; k <= 10; k += 3) {
10368 GemmMicrokernelTester()
10369 .mr(1)
10370 .nr(8)
10371 .kr(1)
10372 .sr(1)
10373 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010374 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010375 .k(k)
10376 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010377 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010378 }
10379 }
10380 }
10381
Marat Dukhande06f492020-04-09 00:19:31 -070010382 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010383 TEST_REQUIRES_ARM_NEON_FMA;
10384 for (uint32_t n = 9; n < 16; n++) {
10385 for (size_t k = 1; k <= 10; k += 3) {
10386 for (uint32_t m = 1; m <= 1; m++) {
10387 GemmMicrokernelTester()
10388 .mr(1)
10389 .nr(8)
10390 .kr(1)
10391 .sr(1)
10392 .m(m)
10393 .n(n)
10394 .k(k)
10395 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010396 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010397 }
10398 }
10399 }
10400 }
10401
Marat Dukhande06f492020-04-09 00:19:31 -070010402 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010403 TEST_REQUIRES_ARM_NEON_FMA;
10404 for (uint32_t n = 16; n <= 24; n += 8) {
10405 for (size_t k = 1; k <= 10; k += 3) {
10406 GemmMicrokernelTester()
10407 .mr(1)
10408 .nr(8)
10409 .kr(1)
10410 .sr(1)
10411 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010412 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010413 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010414 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010415 }
10416 }
10417 }
10418
Marat Dukhande06f492020-04-09 00:19:31 -070010419 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010420 TEST_REQUIRES_ARM_NEON_FMA;
10421 for (uint32_t n = 16; n <= 24; n += 8) {
10422 for (size_t k = 1; k <= 10; k += 3) {
10423 GemmMicrokernelTester()
10424 .mr(1)
10425 .nr(8)
10426 .kr(1)
10427 .sr(1)
10428 .m(1)
10429 .n(n)
10430 .k(k)
10431 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010432 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010433 }
10434 }
10435 }
10436
Marat Dukhande06f492020-04-09 00:19:31 -070010437 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010438 TEST_REQUIRES_ARM_NEON_FMA;
10439 for (uint32_t n = 16; n <= 24; n += 8) {
10440 for (size_t k = 1; k <= 10; k += 3) {
10441 for (uint32_t m = 1; m <= 1; m++) {
10442 GemmMicrokernelTester()
10443 .mr(1)
10444 .nr(8)
10445 .kr(1)
10446 .sr(1)
10447 .m(m)
10448 .n(n)
10449 .k(k)
10450 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010451 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010452 }
10453 }
10454 }
10455 }
10456
Marat Dukhande06f492020-04-09 00:19:31 -070010457 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010458 TEST_REQUIRES_ARM_NEON_FMA;
10459 for (size_t k = 1; k <= 10; k += 3) {
10460 GemmMicrokernelTester()
10461 .mr(1)
10462 .nr(8)
10463 .kr(1)
10464 .sr(1)
10465 .m(1)
10466 .n(8)
10467 .k(k)
10468 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010469 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010470 }
10471 }
10472
Marat Dukhande06f492020-04-09 00:19:31 -070010473 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010474 TEST_REQUIRES_ARM_NEON_FMA;
10475 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010476 for (uint32_t n = 1; n <= 8; n++) {
10477 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010478 GemmMicrokernelTester()
10479 .mr(1)
10480 .nr(8)
10481 .kr(1)
10482 .sr(1)
10483 .m(m)
10484 .n(n)
10485 .k(k)
10486 .ks(3)
10487 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010488 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010489 }
10490 }
10491 }
10492 }
10493
Marat Dukhande06f492020-04-09 00:19:31 -070010494 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010495 TEST_REQUIRES_ARM_NEON_FMA;
10496 for (uint32_t n = 9; n < 16; n++) {
10497 for (size_t k = 1; k <= 10; k += 3) {
10498 GemmMicrokernelTester()
10499 .mr(1)
10500 .nr(8)
10501 .kr(1)
10502 .sr(1)
10503 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010504 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010505 .k(k)
10506 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010507 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010508 }
10509 }
10510 }
10511
Marat Dukhande06f492020-04-09 00:19:31 -070010512 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010513 TEST_REQUIRES_ARM_NEON_FMA;
10514 for (uint32_t n = 16; n <= 24; n += 8) {
10515 for (size_t k = 1; k <= 10; k += 3) {
10516 GemmMicrokernelTester()
10517 .mr(1)
10518 .nr(8)
10519 .kr(1)
10520 .sr(1)
10521 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010522 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010523 .k(k)
10524 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010525 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010526 }
10527 }
10528 }
10529
Marat Dukhande06f492020-04-09 00:19:31 -070010530 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010531 TEST_REQUIRES_ARM_NEON_FMA;
10532 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010533 for (uint32_t n = 1; n <= 8; n++) {
10534 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010535 GemmMicrokernelTester()
10536 .mr(1)
10537 .nr(8)
10538 .kr(1)
10539 .sr(1)
10540 .m(m)
10541 .n(n)
10542 .k(k)
10543 .cm_stride(11)
10544 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010545 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010546 }
10547 }
10548 }
10549 }
10550
Marat Dukhande06f492020-04-09 00:19:31 -070010551 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010552 TEST_REQUIRES_ARM_NEON_FMA;
10553 for (size_t k = 1; k <= 10; k += 3) {
10554 GemmMicrokernelTester()
10555 .mr(1)
10556 .nr(8)
10557 .kr(1)
10558 .sr(1)
10559 .m(1)
10560 .n(8)
10561 .k(k)
10562 .ks(3)
10563 .a_offset(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010564 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010565 }
10566 }
10567
Marat Dukhande06f492020-04-09 00:19:31 -070010568 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010569 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010570 for (size_t k = 1; k <= 10; k += 3) {
10571 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010572 GemmMicrokernelTester()
10573 .mr(1)
10574 .nr(8)
10575 .kr(1)
10576 .sr(1)
10577 .m(1)
10578 .n(8)
10579 .k(k)
10580 .ks(3)
10581 .a_offset(13)
10582 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010583 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010584 }
10585 }
10586 }
10587
Marat Dukhande06f492020-04-09 00:19:31 -070010588 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010589 TEST_REQUIRES_ARM_NEON_FMA;
10590 GemmMicrokernelTester()
10591 .mr(1)
10592 .nr(8)
10593 .kr(1)
10594 .sr(1)
10595 .m(1)
10596 .n(8)
10597 .k(2)
10598 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010599 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010600 }
10601
Marat Dukhande06f492020-04-09 00:19:31 -070010602 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010603 TEST_REQUIRES_ARM_NEON_FMA;
10604 GemmMicrokernelTester()
10605 .mr(1)
10606 .nr(8)
10607 .kr(1)
10608 .sr(1)
10609 .m(1)
10610 .n(8)
10611 .k(2)
10612 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010613 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010614 }
10615
Marat Dukhande06f492020-04-09 00:19:31 -070010616 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010617 TEST_REQUIRES_ARM_NEON_FMA;
10618 GemmMicrokernelTester()
10619 .mr(1)
10620 .nr(8)
10621 .kr(1)
10622 .sr(1)
10623 .m(1)
10624 .n(8)
10625 .k(2)
10626 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010627 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010628 }
10629#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10630
10631
10632#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070010633 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010634 TEST_REQUIRES_ARM_NEON_FMA;
10635 GemmMicrokernelTester()
10636 .mr(6)
10637 .nr(8)
10638 .kr(1)
10639 .sr(1)
10640 .m(6)
10641 .n(8)
10642 .k(2)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010643 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010644 }
10645
Marat Dukhande06f492020-04-09 00:19:31 -070010646 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010647 TEST_REQUIRES_ARM_NEON_FMA;
10648 GemmMicrokernelTester()
10649 .mr(6)
10650 .nr(8)
10651 .kr(1)
10652 .sr(1)
10653 .m(6)
10654 .n(8)
10655 .k(2)
10656 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010657 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010658 }
10659
Marat Dukhande06f492020-04-09 00:19:31 -070010660 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010661 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010662 for (uint32_t n = 1; n <= 8; n++) {
10663 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010664 GemmMicrokernelTester()
10665 .mr(6)
10666 .nr(8)
10667 .kr(1)
10668 .sr(1)
10669 .m(m)
10670 .n(n)
10671 .k(2)
10672 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010673 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010674 }
10675 }
10676 }
10677
Marat Dukhande06f492020-04-09 00:19:31 -070010678 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010679 TEST_REQUIRES_ARM_NEON_FMA;
10680 for (uint32_t m = 1; m <= 6; m++) {
10681 GemmMicrokernelTester()
10682 .mr(6)
10683 .nr(8)
10684 .kr(1)
10685 .sr(1)
10686 .m(m)
10687 .n(8)
10688 .k(2)
10689 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010690 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010691 }
10692 }
10693
Marat Dukhande06f492020-04-09 00:19:31 -070010694 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010695 TEST_REQUIRES_ARM_NEON_FMA;
10696 for (uint32_t n = 1; n <= 8; n++) {
10697 GemmMicrokernelTester()
10698 .mr(6)
10699 .nr(8)
10700 .kr(1)
10701 .sr(1)
10702 .m(6)
10703 .n(n)
10704 .k(2)
10705 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010706 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010707 }
10708 }
10709
Marat Dukhande06f492020-04-09 00:19:31 -070010710 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010711 TEST_REQUIRES_ARM_NEON_FMA;
10712 for (size_t k = 1; k < 2; k++) {
10713 GemmMicrokernelTester()
10714 .mr(6)
10715 .nr(8)
10716 .kr(1)
10717 .sr(1)
10718 .m(6)
10719 .n(8)
10720 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010721 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010722 }
10723 }
10724
Marat Dukhande06f492020-04-09 00:19:31 -070010725 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010726 TEST_REQUIRES_ARM_NEON_FMA;
10727 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010728 for (uint32_t n = 1; n <= 8; n++) {
10729 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010730 GemmMicrokernelTester()
10731 .mr(6)
10732 .nr(8)
10733 .kr(1)
10734 .sr(1)
10735 .m(m)
10736 .n(n)
10737 .k(k)
10738 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010739 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010740 }
10741 }
10742 }
10743 }
10744
Marat Dukhande06f492020-04-09 00:19:31 -070010745 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010746 TEST_REQUIRES_ARM_NEON_FMA;
10747 for (size_t k = 3; k < 4; k++) {
10748 GemmMicrokernelTester()
10749 .mr(6)
10750 .nr(8)
10751 .kr(1)
10752 .sr(1)
10753 .m(6)
10754 .n(8)
10755 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010756 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010757 }
10758 }
10759
Marat Dukhande06f492020-04-09 00:19:31 -070010760 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010761 TEST_REQUIRES_ARM_NEON_FMA;
10762 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010763 for (uint32_t n = 1; n <= 8; n++) {
10764 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010765 GemmMicrokernelTester()
10766 .mr(6)
10767 .nr(8)
10768 .kr(1)
10769 .sr(1)
10770 .m(m)
10771 .n(n)
10772 .k(k)
10773 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010774 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010775 }
10776 }
10777 }
10778 }
10779
Marat Dukhande06f492020-04-09 00:19:31 -070010780 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010781 TEST_REQUIRES_ARM_NEON_FMA;
10782 for (size_t k = 4; k <= 20; k += 2) {
10783 GemmMicrokernelTester()
10784 .mr(6)
10785 .nr(8)
10786 .kr(1)
10787 .sr(1)
10788 .m(6)
10789 .n(8)
10790 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010791 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010792 }
10793 }
10794
Marat Dukhande06f492020-04-09 00:19:31 -070010795 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010796 TEST_REQUIRES_ARM_NEON_FMA;
10797 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010798 for (uint32_t n = 1; n <= 8; n++) {
10799 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010800 GemmMicrokernelTester()
10801 .mr(6)
10802 .nr(8)
10803 .kr(1)
10804 .sr(1)
10805 .m(m)
10806 .n(n)
10807 .k(k)
10808 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010809 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010810 }
10811 }
10812 }
10813 }
10814
Marat Dukhande06f492020-04-09 00:19:31 -070010815 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010816 TEST_REQUIRES_ARM_NEON_FMA;
10817 for (uint32_t n = 9; n < 16; n++) {
10818 for (size_t k = 1; k <= 10; k += 3) {
10819 GemmMicrokernelTester()
10820 .mr(6)
10821 .nr(8)
10822 .kr(1)
10823 .sr(1)
10824 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010825 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010826 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010827 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010828 }
10829 }
10830 }
10831
Marat Dukhande06f492020-04-09 00:19:31 -070010832 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010833 TEST_REQUIRES_ARM_NEON_FMA;
10834 for (uint32_t n = 9; n < 16; n++) {
10835 for (size_t k = 1; k <= 10; k += 3) {
10836 GemmMicrokernelTester()
10837 .mr(6)
10838 .nr(8)
10839 .kr(1)
10840 .sr(1)
10841 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010842 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010843 .k(k)
10844 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010845 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010846 }
10847 }
10848 }
10849
Marat Dukhande06f492020-04-09 00:19:31 -070010850 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010851 TEST_REQUIRES_ARM_NEON_FMA;
10852 for (uint32_t n = 9; n < 16; n++) {
10853 for (size_t k = 1; k <= 10; k += 3) {
10854 for (uint32_t m = 1; m <= 6; m++) {
10855 GemmMicrokernelTester()
10856 .mr(6)
10857 .nr(8)
10858 .kr(1)
10859 .sr(1)
10860 .m(m)
10861 .n(n)
10862 .k(k)
10863 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010864 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010865 }
10866 }
10867 }
10868 }
10869
Marat Dukhande06f492020-04-09 00:19:31 -070010870 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010871 TEST_REQUIRES_ARM_NEON_FMA;
10872 for (uint32_t n = 16; n <= 24; n += 8) {
10873 for (size_t k = 1; k <= 10; k += 3) {
10874 GemmMicrokernelTester()
10875 .mr(6)
10876 .nr(8)
10877 .kr(1)
10878 .sr(1)
10879 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010880 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010881 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010882 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010883 }
10884 }
10885 }
10886
Marat Dukhande06f492020-04-09 00:19:31 -070010887 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010888 TEST_REQUIRES_ARM_NEON_FMA;
10889 for (uint32_t n = 16; n <= 24; n += 8) {
10890 for (size_t k = 1; k <= 10; k += 3) {
10891 GemmMicrokernelTester()
10892 .mr(6)
10893 .nr(8)
10894 .kr(1)
10895 .sr(1)
10896 .m(6)
10897 .n(n)
10898 .k(k)
10899 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010900 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010901 }
10902 }
10903 }
10904
Marat Dukhande06f492020-04-09 00:19:31 -070010905 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010906 TEST_REQUIRES_ARM_NEON_FMA;
10907 for (uint32_t n = 16; n <= 24; n += 8) {
10908 for (size_t k = 1; k <= 10; k += 3) {
10909 for (uint32_t m = 1; m <= 6; m++) {
10910 GemmMicrokernelTester()
10911 .mr(6)
10912 .nr(8)
10913 .kr(1)
10914 .sr(1)
10915 .m(m)
10916 .n(n)
10917 .k(k)
10918 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010919 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010920 }
10921 }
10922 }
10923 }
10924
Marat Dukhande06f492020-04-09 00:19:31 -070010925 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010926 TEST_REQUIRES_ARM_NEON_FMA;
10927 for (size_t k = 1; k <= 10; k += 3) {
10928 GemmMicrokernelTester()
10929 .mr(6)
10930 .nr(8)
10931 .kr(1)
10932 .sr(1)
10933 .m(6)
10934 .n(8)
10935 .k(k)
10936 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010937 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010938 }
10939 }
10940
Marat Dukhande06f492020-04-09 00:19:31 -070010941 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010942 TEST_REQUIRES_ARM_NEON_FMA;
10943 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010944 for (uint32_t n = 1; n <= 8; n++) {
10945 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010946 GemmMicrokernelTester()
10947 .mr(6)
10948 .nr(8)
10949 .kr(1)
10950 .sr(1)
10951 .m(m)
10952 .n(n)
10953 .k(k)
10954 .ks(3)
10955 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010956 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010957 }
10958 }
10959 }
10960 }
10961
Marat Dukhande06f492020-04-09 00:19:31 -070010962 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010963 TEST_REQUIRES_ARM_NEON_FMA;
10964 for (uint32_t n = 9; n < 16; n++) {
10965 for (size_t k = 1; k <= 10; k += 3) {
10966 GemmMicrokernelTester()
10967 .mr(6)
10968 .nr(8)
10969 .kr(1)
10970 .sr(1)
10971 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010972 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010973 .k(k)
10974 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010975 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010976 }
10977 }
10978 }
10979
Marat Dukhande06f492020-04-09 00:19:31 -070010980 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010981 TEST_REQUIRES_ARM_NEON_FMA;
10982 for (uint32_t n = 16; n <= 24; n += 8) {
10983 for (size_t k = 1; k <= 10; k += 3) {
10984 GemmMicrokernelTester()
10985 .mr(6)
10986 .nr(8)
10987 .kr(1)
10988 .sr(1)
10989 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010990 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070010991 .k(k)
10992 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070010993 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070010994 }
10995 }
10996 }
10997
Marat Dukhande06f492020-04-09 00:19:31 -070010998 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070010999 TEST_REQUIRES_ARM_NEON_FMA;
11000 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011001 for (uint32_t n = 1; n <= 8; n++) {
11002 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011003 GemmMicrokernelTester()
11004 .mr(6)
11005 .nr(8)
11006 .kr(1)
11007 .sr(1)
11008 .m(m)
11009 .n(n)
11010 .k(k)
11011 .cm_stride(11)
11012 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011013 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011014 }
11015 }
11016 }
11017 }
11018
Marat Dukhande06f492020-04-09 00:19:31 -070011019 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011020 TEST_REQUIRES_ARM_NEON_FMA;
11021 for (size_t k = 1; k <= 10; k += 3) {
11022 GemmMicrokernelTester()
11023 .mr(6)
11024 .nr(8)
11025 .kr(1)
11026 .sr(1)
11027 .m(6)
11028 .n(8)
11029 .k(k)
11030 .ks(3)
11031 .a_offset(67)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011032 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011033 }
11034 }
11035
Marat Dukhande06f492020-04-09 00:19:31 -070011036 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011037 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011038 for (size_t k = 1; k <= 10; k += 3) {
11039 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011040 GemmMicrokernelTester()
11041 .mr(6)
11042 .nr(8)
11043 .kr(1)
11044 .sr(1)
11045 .m(6)
11046 .n(8)
11047 .k(k)
11048 .ks(3)
11049 .a_offset(67)
11050 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011051 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011052 }
11053 }
11054 }
11055
Marat Dukhande06f492020-04-09 00:19:31 -070011056 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011057 TEST_REQUIRES_ARM_NEON_FMA;
11058 GemmMicrokernelTester()
11059 .mr(6)
11060 .nr(8)
11061 .kr(1)
11062 .sr(1)
11063 .m(6)
11064 .n(8)
11065 .k(2)
11066 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011067 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011068 }
11069
Marat Dukhande06f492020-04-09 00:19:31 -070011070 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011071 TEST_REQUIRES_ARM_NEON_FMA;
11072 GemmMicrokernelTester()
11073 .mr(6)
11074 .nr(8)
11075 .kr(1)
11076 .sr(1)
11077 .m(6)
11078 .n(8)
11079 .k(2)
11080 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011081 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011082 }
11083
Marat Dukhande06f492020-04-09 00:19:31 -070011084 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD64, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011085 TEST_REQUIRES_ARM_NEON_FMA;
11086 GemmMicrokernelTester()
11087 .mr(6)
11088 .nr(8)
11089 .kr(1)
11090 .sr(1)
11091 .m(6)
11092 .n(8)
11093 .k(2)
11094 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011095 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011096 }
11097#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11098
11099
11100#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070011101 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011102 TEST_REQUIRES_ARM_NEON;
11103 GemmMicrokernelTester()
11104 .mr(6)
11105 .nr(8)
11106 .kr(1)
11107 .sr(4)
11108 .m(6)
11109 .n(8)
11110 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011111 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011112 }
11113
Marat Dukhande06f492020-04-09 00:19:31 -070011114 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011115 TEST_REQUIRES_ARM_NEON;
11116 GemmMicrokernelTester()
11117 .mr(6)
11118 .nr(8)
11119 .kr(1)
11120 .sr(4)
11121 .m(6)
11122 .n(8)
11123 .k(4)
11124 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011125 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011126 }
11127
Marat Dukhande06f492020-04-09 00:19:31 -070011128 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011129 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011130 for (uint32_t n = 1; n <= 8; n++) {
11131 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011132 GemmMicrokernelTester()
11133 .mr(6)
11134 .nr(8)
11135 .kr(1)
11136 .sr(4)
11137 .m(m)
11138 .n(n)
11139 .k(4)
11140 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011141 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011142 }
11143 }
11144 }
11145
Marat Dukhande06f492020-04-09 00:19:31 -070011146 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011147 TEST_REQUIRES_ARM_NEON;
11148 for (uint32_t m = 1; m <= 6; m++) {
11149 GemmMicrokernelTester()
11150 .mr(6)
11151 .nr(8)
11152 .kr(1)
11153 .sr(4)
11154 .m(m)
11155 .n(8)
11156 .k(4)
11157 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011158 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011159 }
11160 }
11161
Marat Dukhande06f492020-04-09 00:19:31 -070011162 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011163 TEST_REQUIRES_ARM_NEON;
11164 for (uint32_t n = 1; n <= 8; n++) {
11165 GemmMicrokernelTester()
11166 .mr(6)
11167 .nr(8)
11168 .kr(1)
11169 .sr(4)
11170 .m(6)
11171 .n(n)
11172 .k(4)
11173 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011174 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011175 }
11176 }
11177
Marat Dukhande06f492020-04-09 00:19:31 -070011178 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011179 TEST_REQUIRES_ARM_NEON;
11180 for (size_t k = 1; k < 4; k++) {
11181 GemmMicrokernelTester()
11182 .mr(6)
11183 .nr(8)
11184 .kr(1)
11185 .sr(4)
11186 .m(6)
11187 .n(8)
11188 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011189 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011190 }
11191 }
11192
Marat Dukhande06f492020-04-09 00:19:31 -070011193 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011194 TEST_REQUIRES_ARM_NEON;
11195 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011196 for (uint32_t n = 1; n <= 8; n++) {
11197 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011198 GemmMicrokernelTester()
11199 .mr(6)
11200 .nr(8)
11201 .kr(1)
11202 .sr(4)
11203 .m(m)
11204 .n(n)
11205 .k(k)
11206 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011207 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011208 }
11209 }
11210 }
11211 }
11212
Marat Dukhande06f492020-04-09 00:19:31 -070011213 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011214 TEST_REQUIRES_ARM_NEON;
11215 for (size_t k = 5; k < 8; k++) {
11216 GemmMicrokernelTester()
11217 .mr(6)
11218 .nr(8)
11219 .kr(1)
11220 .sr(4)
11221 .m(6)
11222 .n(8)
11223 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011224 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011225 }
11226 }
11227
Marat Dukhande06f492020-04-09 00:19:31 -070011228 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011229 TEST_REQUIRES_ARM_NEON;
11230 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011231 for (uint32_t n = 1; n <= 8; n++) {
11232 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011233 GemmMicrokernelTester()
11234 .mr(6)
11235 .nr(8)
11236 .kr(1)
11237 .sr(4)
11238 .m(m)
11239 .n(n)
11240 .k(k)
11241 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011242 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011243 }
11244 }
11245 }
11246 }
11247
Marat Dukhande06f492020-04-09 00:19:31 -070011248 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011249 TEST_REQUIRES_ARM_NEON;
11250 for (size_t k = 8; k <= 40; k += 4) {
11251 GemmMicrokernelTester()
11252 .mr(6)
11253 .nr(8)
11254 .kr(1)
11255 .sr(4)
11256 .m(6)
11257 .n(8)
11258 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011259 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011260 }
11261 }
11262
Marat Dukhande06f492020-04-09 00:19:31 -070011263 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011264 TEST_REQUIRES_ARM_NEON;
11265 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011266 for (uint32_t n = 1; n <= 8; n++) {
11267 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011268 GemmMicrokernelTester()
11269 .mr(6)
11270 .nr(8)
11271 .kr(1)
11272 .sr(4)
11273 .m(m)
11274 .n(n)
11275 .k(k)
11276 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011277 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011278 }
11279 }
11280 }
11281 }
11282
Marat Dukhande06f492020-04-09 00:19:31 -070011283 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011284 TEST_REQUIRES_ARM_NEON;
11285 for (uint32_t n = 9; n < 16; n++) {
11286 for (size_t k = 1; k <= 20; k += 5) {
11287 GemmMicrokernelTester()
11288 .mr(6)
11289 .nr(8)
11290 .kr(1)
11291 .sr(4)
11292 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011293 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011294 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011295 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011296 }
11297 }
11298 }
11299
Marat Dukhande06f492020-04-09 00:19:31 -070011300 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011301 TEST_REQUIRES_ARM_NEON;
11302 for (uint32_t n = 9; n < 16; n++) {
11303 for (size_t k = 1; k <= 20; k += 5) {
11304 GemmMicrokernelTester()
11305 .mr(6)
11306 .nr(8)
11307 .kr(1)
11308 .sr(4)
11309 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011310 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011311 .k(k)
11312 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011313 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011314 }
11315 }
11316 }
11317
Marat Dukhande06f492020-04-09 00:19:31 -070011318 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011319 TEST_REQUIRES_ARM_NEON;
11320 for (uint32_t n = 9; n < 16; n++) {
11321 for (size_t k = 1; k <= 20; k += 5) {
11322 for (uint32_t m = 1; m <= 6; m++) {
11323 GemmMicrokernelTester()
11324 .mr(6)
11325 .nr(8)
11326 .kr(1)
11327 .sr(4)
11328 .m(m)
11329 .n(n)
11330 .k(k)
11331 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011332 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011333 }
11334 }
11335 }
11336 }
11337
Marat Dukhande06f492020-04-09 00:19:31 -070011338 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011339 TEST_REQUIRES_ARM_NEON;
11340 for (uint32_t n = 16; n <= 24; n += 8) {
11341 for (size_t k = 1; k <= 20; k += 5) {
11342 GemmMicrokernelTester()
11343 .mr(6)
11344 .nr(8)
11345 .kr(1)
11346 .sr(4)
11347 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011348 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011349 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011350 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011351 }
11352 }
11353 }
11354
Marat Dukhande06f492020-04-09 00:19:31 -070011355 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011356 TEST_REQUIRES_ARM_NEON;
11357 for (uint32_t n = 16; n <= 24; n += 8) {
11358 for (size_t k = 1; k <= 20; k += 5) {
11359 GemmMicrokernelTester()
11360 .mr(6)
11361 .nr(8)
11362 .kr(1)
11363 .sr(4)
11364 .m(6)
11365 .n(n)
11366 .k(k)
11367 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011368 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011369 }
11370 }
11371 }
11372
Marat Dukhande06f492020-04-09 00:19:31 -070011373 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011374 TEST_REQUIRES_ARM_NEON;
11375 for (uint32_t n = 16; n <= 24; n += 8) {
11376 for (size_t k = 1; k <= 20; k += 5) {
11377 for (uint32_t m = 1; m <= 6; m++) {
11378 GemmMicrokernelTester()
11379 .mr(6)
11380 .nr(8)
11381 .kr(1)
11382 .sr(4)
11383 .m(m)
11384 .n(n)
11385 .k(k)
11386 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011387 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011388 }
11389 }
11390 }
11391 }
11392
Marat Dukhande06f492020-04-09 00:19:31 -070011393 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011394 TEST_REQUIRES_ARM_NEON;
11395 for (size_t k = 1; k <= 20; k += 5) {
11396 GemmMicrokernelTester()
11397 .mr(6)
11398 .nr(8)
11399 .kr(1)
11400 .sr(4)
11401 .m(6)
11402 .n(8)
11403 .k(k)
11404 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011405 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011406 }
11407 }
11408
Marat Dukhande06f492020-04-09 00:19:31 -070011409 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011410 TEST_REQUIRES_ARM_NEON;
11411 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011412 for (uint32_t n = 1; n <= 8; n++) {
11413 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011414 GemmMicrokernelTester()
11415 .mr(6)
11416 .nr(8)
11417 .kr(1)
11418 .sr(4)
11419 .m(m)
11420 .n(n)
11421 .k(k)
11422 .ks(3)
11423 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011424 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011425 }
11426 }
11427 }
11428 }
11429
Marat Dukhande06f492020-04-09 00:19:31 -070011430 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011431 TEST_REQUIRES_ARM_NEON;
11432 for (uint32_t n = 9; n < 16; n++) {
11433 for (size_t k = 1; k <= 20; k += 5) {
11434 GemmMicrokernelTester()
11435 .mr(6)
11436 .nr(8)
11437 .kr(1)
11438 .sr(4)
11439 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011440 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011441 .k(k)
11442 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011443 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011444 }
11445 }
11446 }
11447
Marat Dukhande06f492020-04-09 00:19:31 -070011448 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011449 TEST_REQUIRES_ARM_NEON;
11450 for (uint32_t n = 16; n <= 24; n += 8) {
11451 for (size_t k = 1; k <= 20; k += 5) {
11452 GemmMicrokernelTester()
11453 .mr(6)
11454 .nr(8)
11455 .kr(1)
11456 .sr(4)
11457 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011458 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011459 .k(k)
11460 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011461 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011462 }
11463 }
11464 }
11465
Marat Dukhande06f492020-04-09 00:19:31 -070011466 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011467 TEST_REQUIRES_ARM_NEON;
11468 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011469 for (uint32_t n = 1; n <= 8; n++) {
11470 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011471 GemmMicrokernelTester()
11472 .mr(6)
11473 .nr(8)
11474 .kr(1)
11475 .sr(4)
11476 .m(m)
11477 .n(n)
11478 .k(k)
11479 .cm_stride(11)
11480 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011481 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011482 }
11483 }
11484 }
11485 }
11486
Marat Dukhande06f492020-04-09 00:19:31 -070011487 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011488 TEST_REQUIRES_ARM_NEON;
11489 for (size_t k = 1; k <= 20; k += 5) {
11490 GemmMicrokernelTester()
11491 .mr(6)
11492 .nr(8)
11493 .kr(1)
11494 .sr(4)
11495 .m(6)
11496 .n(8)
11497 .k(k)
11498 .ks(3)
11499 .a_offset(127)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011500 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011501 }
11502 }
11503
Marat Dukhande06f492020-04-09 00:19:31 -070011504 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011505 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011506 for (size_t k = 1; k <= 20; k += 5) {
11507 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011508 GemmMicrokernelTester()
11509 .mr(6)
11510 .nr(8)
11511 .kr(1)
11512 .sr(4)
11513 .m(6)
11514 .n(8)
11515 .k(k)
11516 .ks(3)
11517 .a_offset(127)
11518 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011519 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011520 }
11521 }
11522 }
11523
Marat Dukhande06f492020-04-09 00:19:31 -070011524 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011525 TEST_REQUIRES_ARM_NEON;
11526 GemmMicrokernelTester()
11527 .mr(6)
11528 .nr(8)
11529 .kr(1)
11530 .sr(4)
11531 .m(6)
11532 .n(8)
11533 .k(4)
11534 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011535 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011536 }
11537
Marat Dukhande06f492020-04-09 00:19:31 -070011538 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011539 TEST_REQUIRES_ARM_NEON;
11540 GemmMicrokernelTester()
11541 .mr(6)
11542 .nr(8)
11543 .kr(1)
11544 .sr(4)
11545 .m(6)
11546 .n(8)
11547 .k(4)
11548 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011549 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011550 }
11551
Marat Dukhande06f492020-04-09 00:19:31 -070011552 TEST(F32_IGEMM_MINMAX_6X8S4__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011553 TEST_REQUIRES_ARM_NEON;
11554 GemmMicrokernelTester()
11555 .mr(6)
11556 .nr(8)
11557 .kr(1)
11558 .sr(4)
11559 .m(6)
11560 .n(8)
11561 .k(4)
11562 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011563 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011564 }
11565#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11566
11567
11568#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070011569 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011570 TEST_REQUIRES_ARM_NEON;
11571 GemmMicrokernelTester()
11572 .mr(8)
11573 .nr(8)
11574 .kr(1)
11575 .sr(4)
11576 .m(8)
11577 .n(8)
11578 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011579 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011580 }
11581
Marat Dukhande06f492020-04-09 00:19:31 -070011582 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011583 TEST_REQUIRES_ARM_NEON;
11584 GemmMicrokernelTester()
11585 .mr(8)
11586 .nr(8)
11587 .kr(1)
11588 .sr(4)
11589 .m(8)
11590 .n(8)
11591 .k(4)
11592 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011593 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011594 }
11595
Marat Dukhande06f492020-04-09 00:19:31 -070011596 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011597 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011598 for (uint32_t n = 1; n <= 8; n++) {
11599 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011600 GemmMicrokernelTester()
11601 .mr(8)
11602 .nr(8)
11603 .kr(1)
11604 .sr(4)
11605 .m(m)
11606 .n(n)
11607 .k(4)
11608 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011609 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011610 }
11611 }
11612 }
11613
Marat Dukhande06f492020-04-09 00:19:31 -070011614 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011615 TEST_REQUIRES_ARM_NEON;
11616 for (uint32_t m = 1; m <= 8; m++) {
11617 GemmMicrokernelTester()
11618 .mr(8)
11619 .nr(8)
11620 .kr(1)
11621 .sr(4)
11622 .m(m)
11623 .n(8)
11624 .k(4)
11625 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011626 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011627 }
11628 }
11629
Marat Dukhande06f492020-04-09 00:19:31 -070011630 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011631 TEST_REQUIRES_ARM_NEON;
11632 for (uint32_t n = 1; n <= 8; n++) {
11633 GemmMicrokernelTester()
11634 .mr(8)
11635 .nr(8)
11636 .kr(1)
11637 .sr(4)
11638 .m(8)
11639 .n(n)
11640 .k(4)
11641 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011642 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011643 }
11644 }
11645
Marat Dukhande06f492020-04-09 00:19:31 -070011646 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011647 TEST_REQUIRES_ARM_NEON;
11648 for (size_t k = 1; k < 4; k++) {
11649 GemmMicrokernelTester()
11650 .mr(8)
11651 .nr(8)
11652 .kr(1)
11653 .sr(4)
11654 .m(8)
11655 .n(8)
11656 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011657 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011658 }
11659 }
11660
Marat Dukhande06f492020-04-09 00:19:31 -070011661 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011662 TEST_REQUIRES_ARM_NEON;
11663 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011664 for (uint32_t n = 1; n <= 8; n++) {
11665 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011666 GemmMicrokernelTester()
11667 .mr(8)
11668 .nr(8)
11669 .kr(1)
11670 .sr(4)
11671 .m(m)
11672 .n(n)
11673 .k(k)
11674 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011675 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011676 }
11677 }
11678 }
11679 }
11680
Marat Dukhande06f492020-04-09 00:19:31 -070011681 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011682 TEST_REQUIRES_ARM_NEON;
11683 for (size_t k = 5; k < 8; k++) {
11684 GemmMicrokernelTester()
11685 .mr(8)
11686 .nr(8)
11687 .kr(1)
11688 .sr(4)
11689 .m(8)
11690 .n(8)
11691 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011692 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011693 }
11694 }
11695
Marat Dukhande06f492020-04-09 00:19:31 -070011696 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011697 TEST_REQUIRES_ARM_NEON;
11698 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011699 for (uint32_t n = 1; n <= 8; n++) {
11700 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011701 GemmMicrokernelTester()
11702 .mr(8)
11703 .nr(8)
11704 .kr(1)
11705 .sr(4)
11706 .m(m)
11707 .n(n)
11708 .k(k)
11709 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011710 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011711 }
11712 }
11713 }
11714 }
11715
Marat Dukhande06f492020-04-09 00:19:31 -070011716 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011717 TEST_REQUIRES_ARM_NEON;
11718 for (size_t k = 8; k <= 40; k += 4) {
11719 GemmMicrokernelTester()
11720 .mr(8)
11721 .nr(8)
11722 .kr(1)
11723 .sr(4)
11724 .m(8)
11725 .n(8)
11726 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011727 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011728 }
11729 }
11730
Marat Dukhande06f492020-04-09 00:19:31 -070011731 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011732 TEST_REQUIRES_ARM_NEON;
11733 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011734 for (uint32_t n = 1; n <= 8; n++) {
11735 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011736 GemmMicrokernelTester()
11737 .mr(8)
11738 .nr(8)
11739 .kr(1)
11740 .sr(4)
11741 .m(m)
11742 .n(n)
11743 .k(k)
11744 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011745 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011746 }
11747 }
11748 }
11749 }
11750
Marat Dukhande06f492020-04-09 00:19:31 -070011751 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011752 TEST_REQUIRES_ARM_NEON;
11753 for (uint32_t n = 9; n < 16; n++) {
11754 for (size_t k = 1; k <= 20; k += 5) {
11755 GemmMicrokernelTester()
11756 .mr(8)
11757 .nr(8)
11758 .kr(1)
11759 .sr(4)
11760 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011761 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011762 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011763 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011764 }
11765 }
11766 }
11767
Marat Dukhande06f492020-04-09 00:19:31 -070011768 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011769 TEST_REQUIRES_ARM_NEON;
11770 for (uint32_t n = 9; n < 16; n++) {
11771 for (size_t k = 1; k <= 20; k += 5) {
11772 GemmMicrokernelTester()
11773 .mr(8)
11774 .nr(8)
11775 .kr(1)
11776 .sr(4)
11777 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011778 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011779 .k(k)
11780 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011781 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011782 }
11783 }
11784 }
11785
Marat Dukhande06f492020-04-09 00:19:31 -070011786 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011787 TEST_REQUIRES_ARM_NEON;
11788 for (uint32_t n = 9; n < 16; n++) {
11789 for (size_t k = 1; k <= 20; k += 5) {
11790 for (uint32_t m = 1; m <= 8; m++) {
11791 GemmMicrokernelTester()
11792 .mr(8)
11793 .nr(8)
11794 .kr(1)
11795 .sr(4)
11796 .m(m)
11797 .n(n)
11798 .k(k)
11799 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011800 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011801 }
11802 }
11803 }
11804 }
11805
Marat Dukhande06f492020-04-09 00:19:31 -070011806 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011807 TEST_REQUIRES_ARM_NEON;
11808 for (uint32_t n = 16; n <= 24; n += 8) {
11809 for (size_t k = 1; k <= 20; k += 5) {
11810 GemmMicrokernelTester()
11811 .mr(8)
11812 .nr(8)
11813 .kr(1)
11814 .sr(4)
11815 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011816 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011817 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011818 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011819 }
11820 }
11821 }
11822
Marat Dukhande06f492020-04-09 00:19:31 -070011823 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011824 TEST_REQUIRES_ARM_NEON;
11825 for (uint32_t n = 16; n <= 24; n += 8) {
11826 for (size_t k = 1; k <= 20; k += 5) {
11827 GemmMicrokernelTester()
11828 .mr(8)
11829 .nr(8)
11830 .kr(1)
11831 .sr(4)
11832 .m(8)
11833 .n(n)
11834 .k(k)
11835 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011836 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011837 }
11838 }
11839 }
11840
Marat Dukhande06f492020-04-09 00:19:31 -070011841 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011842 TEST_REQUIRES_ARM_NEON;
11843 for (uint32_t n = 16; n <= 24; n += 8) {
11844 for (size_t k = 1; k <= 20; k += 5) {
11845 for (uint32_t m = 1; m <= 8; m++) {
11846 GemmMicrokernelTester()
11847 .mr(8)
11848 .nr(8)
11849 .kr(1)
11850 .sr(4)
11851 .m(m)
11852 .n(n)
11853 .k(k)
11854 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011855 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011856 }
11857 }
11858 }
11859 }
11860
Marat Dukhande06f492020-04-09 00:19:31 -070011861 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011862 TEST_REQUIRES_ARM_NEON;
11863 for (size_t k = 1; k <= 20; k += 5) {
11864 GemmMicrokernelTester()
11865 .mr(8)
11866 .nr(8)
11867 .kr(1)
11868 .sr(4)
11869 .m(8)
11870 .n(8)
11871 .k(k)
11872 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011873 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011874 }
11875 }
11876
Marat Dukhande06f492020-04-09 00:19:31 -070011877 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011878 TEST_REQUIRES_ARM_NEON;
11879 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011880 for (uint32_t n = 1; n <= 8; n++) {
11881 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011882 GemmMicrokernelTester()
11883 .mr(8)
11884 .nr(8)
11885 .kr(1)
11886 .sr(4)
11887 .m(m)
11888 .n(n)
11889 .k(k)
11890 .ks(3)
11891 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011892 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011893 }
11894 }
11895 }
11896 }
11897
Marat Dukhande06f492020-04-09 00:19:31 -070011898 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011899 TEST_REQUIRES_ARM_NEON;
11900 for (uint32_t n = 9; n < 16; n++) {
11901 for (size_t k = 1; k <= 20; k += 5) {
11902 GemmMicrokernelTester()
11903 .mr(8)
11904 .nr(8)
11905 .kr(1)
11906 .sr(4)
11907 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011908 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011909 .k(k)
11910 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011911 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011912 }
11913 }
11914 }
11915
Marat Dukhande06f492020-04-09 00:19:31 -070011916 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011917 TEST_REQUIRES_ARM_NEON;
11918 for (uint32_t n = 16; n <= 24; n += 8) {
11919 for (size_t k = 1; k <= 20; k += 5) {
11920 GemmMicrokernelTester()
11921 .mr(8)
11922 .nr(8)
11923 .kr(1)
11924 .sr(4)
11925 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011926 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070011927 .k(k)
11928 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011929 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011930 }
11931 }
11932 }
11933
Marat Dukhande06f492020-04-09 00:19:31 -070011934 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011935 TEST_REQUIRES_ARM_NEON;
11936 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011937 for (uint32_t n = 1; n <= 8; n++) {
11938 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011939 GemmMicrokernelTester()
11940 .mr(8)
11941 .nr(8)
11942 .kr(1)
11943 .sr(4)
11944 .m(m)
11945 .n(n)
11946 .k(k)
11947 .cm_stride(11)
11948 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011949 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011950 }
11951 }
11952 }
11953 }
11954
Marat Dukhande06f492020-04-09 00:19:31 -070011955 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011956 TEST_REQUIRES_ARM_NEON;
11957 for (size_t k = 1; k <= 20; k += 5) {
11958 GemmMicrokernelTester()
11959 .mr(8)
11960 .nr(8)
11961 .kr(1)
11962 .sr(4)
11963 .m(8)
11964 .n(8)
11965 .k(k)
11966 .ks(3)
11967 .a_offset(163)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011968 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011969 }
11970 }
11971
Marat Dukhande06f492020-04-09 00:19:31 -070011972 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011973 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011974 for (size_t k = 1; k <= 20; k += 5) {
11975 for (uint32_t mz = 0; mz < 8; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011976 GemmMicrokernelTester()
11977 .mr(8)
11978 .nr(8)
11979 .kr(1)
11980 .sr(4)
11981 .m(8)
11982 .n(8)
11983 .k(k)
11984 .ks(3)
11985 .a_offset(163)
11986 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070011987 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070011988 }
11989 }
11990 }
11991
Marat Dukhande06f492020-04-09 00:19:31 -070011992 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070011993 TEST_REQUIRES_ARM_NEON;
11994 GemmMicrokernelTester()
11995 .mr(8)
11996 .nr(8)
11997 .kr(1)
11998 .sr(4)
11999 .m(8)
12000 .n(8)
12001 .k(4)
12002 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012003 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012004 }
12005
Marat Dukhande06f492020-04-09 00:19:31 -070012006 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012007 TEST_REQUIRES_ARM_NEON;
12008 GemmMicrokernelTester()
12009 .mr(8)
12010 .nr(8)
12011 .kr(1)
12012 .sr(4)
12013 .m(8)
12014 .n(8)
12015 .k(4)
12016 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012017 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012018 }
12019
Marat Dukhande06f492020-04-09 00:19:31 -070012020 TEST(F32_IGEMM_MINMAX_8X8S4__NEON, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012021 TEST_REQUIRES_ARM_NEON;
12022 GemmMicrokernelTester()
12023 .mr(8)
12024 .nr(8)
12025 .kr(1)
12026 .sr(4)
12027 .m(8)
12028 .n(8)
12029 .k(4)
12030 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012031 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neon, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012032 }
12033#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12034
12035
12036#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070012037 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012038 TEST_REQUIRES_ARM_NEON_FMA;
12039 GemmMicrokernelTester()
12040 .mr(1)
12041 .nr(8)
12042 .kr(1)
12043 .sr(4)
12044 .m(1)
12045 .n(8)
12046 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012047 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012048 }
12049
Marat Dukhande06f492020-04-09 00:19:31 -070012050 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012051 TEST_REQUIRES_ARM_NEON_FMA;
12052 GemmMicrokernelTester()
12053 .mr(1)
12054 .nr(8)
12055 .kr(1)
12056 .sr(4)
12057 .m(1)
12058 .n(8)
12059 .k(4)
12060 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012061 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012062 }
12063
Marat Dukhande06f492020-04-09 00:19:31 -070012064 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012065 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012066 for (uint32_t n = 1; n <= 8; n++) {
12067 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012068 GemmMicrokernelTester()
12069 .mr(1)
12070 .nr(8)
12071 .kr(1)
12072 .sr(4)
12073 .m(m)
12074 .n(n)
12075 .k(4)
12076 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012077 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012078 }
12079 }
12080 }
12081
Marat Dukhande06f492020-04-09 00:19:31 -070012082 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012083 TEST_REQUIRES_ARM_NEON_FMA;
12084 for (uint32_t m = 1; m <= 1; m++) {
12085 GemmMicrokernelTester()
12086 .mr(1)
12087 .nr(8)
12088 .kr(1)
12089 .sr(4)
12090 .m(m)
12091 .n(8)
12092 .k(4)
12093 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012094 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012095 }
12096 }
12097
Marat Dukhande06f492020-04-09 00:19:31 -070012098 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012099 TEST_REQUIRES_ARM_NEON_FMA;
12100 for (uint32_t n = 1; n <= 8; n++) {
12101 GemmMicrokernelTester()
12102 .mr(1)
12103 .nr(8)
12104 .kr(1)
12105 .sr(4)
12106 .m(1)
12107 .n(n)
12108 .k(4)
12109 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012110 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012111 }
12112 }
12113
Marat Dukhande06f492020-04-09 00:19:31 -070012114 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012115 TEST_REQUIRES_ARM_NEON_FMA;
12116 for (size_t k = 1; k < 4; k++) {
12117 GemmMicrokernelTester()
12118 .mr(1)
12119 .nr(8)
12120 .kr(1)
12121 .sr(4)
12122 .m(1)
12123 .n(8)
12124 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012125 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012126 }
12127 }
12128
Marat Dukhande06f492020-04-09 00:19:31 -070012129 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012130 TEST_REQUIRES_ARM_NEON_FMA;
12131 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012132 for (uint32_t n = 1; n <= 8; n++) {
12133 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012134 GemmMicrokernelTester()
12135 .mr(1)
12136 .nr(8)
12137 .kr(1)
12138 .sr(4)
12139 .m(m)
12140 .n(n)
12141 .k(k)
12142 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012143 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012144 }
12145 }
12146 }
12147 }
12148
Marat Dukhande06f492020-04-09 00:19:31 -070012149 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012150 TEST_REQUIRES_ARM_NEON_FMA;
12151 for (size_t k = 5; k < 8; k++) {
12152 GemmMicrokernelTester()
12153 .mr(1)
12154 .nr(8)
12155 .kr(1)
12156 .sr(4)
12157 .m(1)
12158 .n(8)
12159 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012160 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012161 }
12162 }
12163
Marat Dukhande06f492020-04-09 00:19:31 -070012164 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012165 TEST_REQUIRES_ARM_NEON_FMA;
12166 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012167 for (uint32_t n = 1; n <= 8; n++) {
12168 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012169 GemmMicrokernelTester()
12170 .mr(1)
12171 .nr(8)
12172 .kr(1)
12173 .sr(4)
12174 .m(m)
12175 .n(n)
12176 .k(k)
12177 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012178 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012179 }
12180 }
12181 }
12182 }
12183
Marat Dukhande06f492020-04-09 00:19:31 -070012184 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012185 TEST_REQUIRES_ARM_NEON_FMA;
12186 for (size_t k = 8; k <= 40; k += 4) {
12187 GemmMicrokernelTester()
12188 .mr(1)
12189 .nr(8)
12190 .kr(1)
12191 .sr(4)
12192 .m(1)
12193 .n(8)
12194 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012195 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012196 }
12197 }
12198
Marat Dukhande06f492020-04-09 00:19:31 -070012199 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012200 TEST_REQUIRES_ARM_NEON_FMA;
12201 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012202 for (uint32_t n = 1; n <= 8; n++) {
12203 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012204 GemmMicrokernelTester()
12205 .mr(1)
12206 .nr(8)
12207 .kr(1)
12208 .sr(4)
12209 .m(m)
12210 .n(n)
12211 .k(k)
12212 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012213 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012214 }
12215 }
12216 }
12217 }
12218
Marat Dukhande06f492020-04-09 00:19:31 -070012219 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012220 TEST_REQUIRES_ARM_NEON_FMA;
12221 for (uint32_t n = 9; n < 16; n++) {
12222 for (size_t k = 1; k <= 20; k += 5) {
12223 GemmMicrokernelTester()
12224 .mr(1)
12225 .nr(8)
12226 .kr(1)
12227 .sr(4)
12228 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012229 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012230 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012231 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012232 }
12233 }
12234 }
12235
Marat Dukhande06f492020-04-09 00:19:31 -070012236 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012237 TEST_REQUIRES_ARM_NEON_FMA;
12238 for (uint32_t n = 9; n < 16; n++) {
12239 for (size_t k = 1; k <= 20; k += 5) {
12240 GemmMicrokernelTester()
12241 .mr(1)
12242 .nr(8)
12243 .kr(1)
12244 .sr(4)
12245 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012246 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012247 .k(k)
12248 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012249 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012250 }
12251 }
12252 }
12253
Marat Dukhande06f492020-04-09 00:19:31 -070012254 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012255 TEST_REQUIRES_ARM_NEON_FMA;
12256 for (uint32_t n = 9; n < 16; n++) {
12257 for (size_t k = 1; k <= 20; k += 5) {
12258 for (uint32_t m = 1; m <= 1; m++) {
12259 GemmMicrokernelTester()
12260 .mr(1)
12261 .nr(8)
12262 .kr(1)
12263 .sr(4)
12264 .m(m)
12265 .n(n)
12266 .k(k)
12267 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012268 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012269 }
12270 }
12271 }
12272 }
12273
Marat Dukhande06f492020-04-09 00:19:31 -070012274 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012275 TEST_REQUIRES_ARM_NEON_FMA;
12276 for (uint32_t n = 16; n <= 24; n += 8) {
12277 for (size_t k = 1; k <= 20; k += 5) {
12278 GemmMicrokernelTester()
12279 .mr(1)
12280 .nr(8)
12281 .kr(1)
12282 .sr(4)
12283 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012284 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012285 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012286 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012287 }
12288 }
12289 }
12290
Marat Dukhande06f492020-04-09 00:19:31 -070012291 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012292 TEST_REQUIRES_ARM_NEON_FMA;
12293 for (uint32_t n = 16; n <= 24; n += 8) {
12294 for (size_t k = 1; k <= 20; k += 5) {
12295 GemmMicrokernelTester()
12296 .mr(1)
12297 .nr(8)
12298 .kr(1)
12299 .sr(4)
12300 .m(1)
12301 .n(n)
12302 .k(k)
12303 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012304 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012305 }
12306 }
12307 }
12308
Marat Dukhande06f492020-04-09 00:19:31 -070012309 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012310 TEST_REQUIRES_ARM_NEON_FMA;
12311 for (uint32_t n = 16; n <= 24; n += 8) {
12312 for (size_t k = 1; k <= 20; k += 5) {
12313 for (uint32_t m = 1; m <= 1; m++) {
12314 GemmMicrokernelTester()
12315 .mr(1)
12316 .nr(8)
12317 .kr(1)
12318 .sr(4)
12319 .m(m)
12320 .n(n)
12321 .k(k)
12322 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012323 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012324 }
12325 }
12326 }
12327 }
12328
Marat Dukhande06f492020-04-09 00:19:31 -070012329 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012330 TEST_REQUIRES_ARM_NEON_FMA;
12331 for (size_t k = 1; k <= 20; k += 5) {
12332 GemmMicrokernelTester()
12333 .mr(1)
12334 .nr(8)
12335 .kr(1)
12336 .sr(4)
12337 .m(1)
12338 .n(8)
12339 .k(k)
12340 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012341 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012342 }
12343 }
12344
Marat Dukhande06f492020-04-09 00:19:31 -070012345 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012346 TEST_REQUIRES_ARM_NEON_FMA;
12347 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012348 for (uint32_t n = 1; n <= 8; n++) {
12349 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012350 GemmMicrokernelTester()
12351 .mr(1)
12352 .nr(8)
12353 .kr(1)
12354 .sr(4)
12355 .m(m)
12356 .n(n)
12357 .k(k)
12358 .ks(3)
12359 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012360 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012361 }
12362 }
12363 }
12364 }
12365
Marat Dukhande06f492020-04-09 00:19:31 -070012366 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012367 TEST_REQUIRES_ARM_NEON_FMA;
12368 for (uint32_t n = 9; n < 16; n++) {
12369 for (size_t k = 1; k <= 20; k += 5) {
12370 GemmMicrokernelTester()
12371 .mr(1)
12372 .nr(8)
12373 .kr(1)
12374 .sr(4)
12375 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012376 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012377 .k(k)
12378 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012379 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012380 }
12381 }
12382 }
12383
Marat Dukhande06f492020-04-09 00:19:31 -070012384 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012385 TEST_REQUIRES_ARM_NEON_FMA;
12386 for (uint32_t n = 16; n <= 24; n += 8) {
12387 for (size_t k = 1; k <= 20; k += 5) {
12388 GemmMicrokernelTester()
12389 .mr(1)
12390 .nr(8)
12391 .kr(1)
12392 .sr(4)
12393 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012394 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012395 .k(k)
12396 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012397 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012398 }
12399 }
12400 }
12401
Marat Dukhande06f492020-04-09 00:19:31 -070012402 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012403 TEST_REQUIRES_ARM_NEON_FMA;
12404 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012405 for (uint32_t n = 1; n <= 8; n++) {
12406 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012407 GemmMicrokernelTester()
12408 .mr(1)
12409 .nr(8)
12410 .kr(1)
12411 .sr(4)
12412 .m(m)
12413 .n(n)
12414 .k(k)
12415 .cm_stride(11)
12416 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012417 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012418 }
12419 }
12420 }
12421 }
12422
Marat Dukhande06f492020-04-09 00:19:31 -070012423 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012424 TEST_REQUIRES_ARM_NEON_FMA;
12425 for (size_t k = 1; k <= 20; k += 5) {
12426 GemmMicrokernelTester()
12427 .mr(1)
12428 .nr(8)
12429 .kr(1)
12430 .sr(4)
12431 .m(1)
12432 .n(8)
12433 .k(k)
12434 .ks(3)
12435 .a_offset(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012436 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012437 }
12438 }
12439
Marat Dukhande06f492020-04-09 00:19:31 -070012440 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012441 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012442 for (size_t k = 1; k <= 20; k += 5) {
12443 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012444 GemmMicrokernelTester()
12445 .mr(1)
12446 .nr(8)
12447 .kr(1)
12448 .sr(4)
12449 .m(1)
12450 .n(8)
12451 .k(k)
12452 .ks(3)
12453 .a_offset(23)
12454 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012455 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012456 }
12457 }
12458 }
12459
Marat Dukhande06f492020-04-09 00:19:31 -070012460 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012461 TEST_REQUIRES_ARM_NEON_FMA;
12462 GemmMicrokernelTester()
12463 .mr(1)
12464 .nr(8)
12465 .kr(1)
12466 .sr(4)
12467 .m(1)
12468 .n(8)
12469 .k(4)
12470 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012471 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012472 }
12473
Marat Dukhande06f492020-04-09 00:19:31 -070012474 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012475 TEST_REQUIRES_ARM_NEON_FMA;
12476 GemmMicrokernelTester()
12477 .mr(1)
12478 .nr(8)
12479 .kr(1)
12480 .sr(4)
12481 .m(1)
12482 .n(8)
12483 .k(4)
12484 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012485 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012486 }
12487
Marat Dukhande06f492020-04-09 00:19:31 -070012488 TEST(F32_IGEMM_MINMAX_1X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012489 TEST_REQUIRES_ARM_NEON_FMA;
12490 GemmMicrokernelTester()
12491 .mr(1)
12492 .nr(8)
12493 .kr(1)
12494 .sr(4)
12495 .m(1)
12496 .n(8)
12497 .k(4)
12498 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012499 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012500 }
12501#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12502
12503
12504#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070012505 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012506 TEST_REQUIRES_ARM_NEON_FMA;
12507 GemmMicrokernelTester()
12508 .mr(6)
12509 .nr(8)
12510 .kr(1)
12511 .sr(4)
12512 .m(6)
12513 .n(8)
12514 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012515 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012516 }
12517
Marat Dukhande06f492020-04-09 00:19:31 -070012518 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012519 TEST_REQUIRES_ARM_NEON_FMA;
12520 GemmMicrokernelTester()
12521 .mr(6)
12522 .nr(8)
12523 .kr(1)
12524 .sr(4)
12525 .m(6)
12526 .n(8)
12527 .k(4)
12528 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012529 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012530 }
12531
Marat Dukhande06f492020-04-09 00:19:31 -070012532 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012533 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012534 for (uint32_t n = 1; n <= 8; n++) {
12535 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012536 GemmMicrokernelTester()
12537 .mr(6)
12538 .nr(8)
12539 .kr(1)
12540 .sr(4)
12541 .m(m)
12542 .n(n)
12543 .k(4)
12544 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012545 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012546 }
12547 }
12548 }
12549
Marat Dukhande06f492020-04-09 00:19:31 -070012550 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012551 TEST_REQUIRES_ARM_NEON_FMA;
12552 for (uint32_t m = 1; m <= 6; m++) {
12553 GemmMicrokernelTester()
12554 .mr(6)
12555 .nr(8)
12556 .kr(1)
12557 .sr(4)
12558 .m(m)
12559 .n(8)
12560 .k(4)
12561 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012562 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012563 }
12564 }
12565
Marat Dukhande06f492020-04-09 00:19:31 -070012566 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012567 TEST_REQUIRES_ARM_NEON_FMA;
12568 for (uint32_t n = 1; n <= 8; n++) {
12569 GemmMicrokernelTester()
12570 .mr(6)
12571 .nr(8)
12572 .kr(1)
12573 .sr(4)
12574 .m(6)
12575 .n(n)
12576 .k(4)
12577 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012578 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012579 }
12580 }
12581
Marat Dukhande06f492020-04-09 00:19:31 -070012582 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012583 TEST_REQUIRES_ARM_NEON_FMA;
12584 for (size_t k = 1; k < 4; k++) {
12585 GemmMicrokernelTester()
12586 .mr(6)
12587 .nr(8)
12588 .kr(1)
12589 .sr(4)
12590 .m(6)
12591 .n(8)
12592 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012593 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012594 }
12595 }
12596
Marat Dukhande06f492020-04-09 00:19:31 -070012597 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012598 TEST_REQUIRES_ARM_NEON_FMA;
12599 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012600 for (uint32_t n = 1; n <= 8; n++) {
12601 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012602 GemmMicrokernelTester()
12603 .mr(6)
12604 .nr(8)
12605 .kr(1)
12606 .sr(4)
12607 .m(m)
12608 .n(n)
12609 .k(k)
12610 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012611 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012612 }
12613 }
12614 }
12615 }
12616
Marat Dukhande06f492020-04-09 00:19:31 -070012617 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012618 TEST_REQUIRES_ARM_NEON_FMA;
12619 for (size_t k = 5; k < 8; k++) {
12620 GemmMicrokernelTester()
12621 .mr(6)
12622 .nr(8)
12623 .kr(1)
12624 .sr(4)
12625 .m(6)
12626 .n(8)
12627 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012628 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012629 }
12630 }
12631
Marat Dukhande06f492020-04-09 00:19:31 -070012632 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012633 TEST_REQUIRES_ARM_NEON_FMA;
12634 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012635 for (uint32_t n = 1; n <= 8; n++) {
12636 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012637 GemmMicrokernelTester()
12638 .mr(6)
12639 .nr(8)
12640 .kr(1)
12641 .sr(4)
12642 .m(m)
12643 .n(n)
12644 .k(k)
12645 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012646 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012647 }
12648 }
12649 }
12650 }
12651
Marat Dukhande06f492020-04-09 00:19:31 -070012652 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012653 TEST_REQUIRES_ARM_NEON_FMA;
12654 for (size_t k = 8; k <= 40; k += 4) {
12655 GemmMicrokernelTester()
12656 .mr(6)
12657 .nr(8)
12658 .kr(1)
12659 .sr(4)
12660 .m(6)
12661 .n(8)
12662 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012663 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012664 }
12665 }
12666
Marat Dukhande06f492020-04-09 00:19:31 -070012667 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012668 TEST_REQUIRES_ARM_NEON_FMA;
12669 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012670 for (uint32_t n = 1; n <= 8; n++) {
12671 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012672 GemmMicrokernelTester()
12673 .mr(6)
12674 .nr(8)
12675 .kr(1)
12676 .sr(4)
12677 .m(m)
12678 .n(n)
12679 .k(k)
12680 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012681 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012682 }
12683 }
12684 }
12685 }
12686
Marat Dukhande06f492020-04-09 00:19:31 -070012687 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012688 TEST_REQUIRES_ARM_NEON_FMA;
12689 for (uint32_t n = 9; n < 16; n++) {
12690 for (size_t k = 1; k <= 20; k += 5) {
12691 GemmMicrokernelTester()
12692 .mr(6)
12693 .nr(8)
12694 .kr(1)
12695 .sr(4)
12696 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012697 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012698 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012699 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012700 }
12701 }
12702 }
12703
Marat Dukhande06f492020-04-09 00:19:31 -070012704 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012705 TEST_REQUIRES_ARM_NEON_FMA;
12706 for (uint32_t n = 9; n < 16; n++) {
12707 for (size_t k = 1; k <= 20; k += 5) {
12708 GemmMicrokernelTester()
12709 .mr(6)
12710 .nr(8)
12711 .kr(1)
12712 .sr(4)
12713 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012714 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012715 .k(k)
12716 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012717 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012718 }
12719 }
12720 }
12721
Marat Dukhande06f492020-04-09 00:19:31 -070012722 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012723 TEST_REQUIRES_ARM_NEON_FMA;
12724 for (uint32_t n = 9; n < 16; n++) {
12725 for (size_t k = 1; k <= 20; k += 5) {
12726 for (uint32_t m = 1; m <= 6; m++) {
12727 GemmMicrokernelTester()
12728 .mr(6)
12729 .nr(8)
12730 .kr(1)
12731 .sr(4)
12732 .m(m)
12733 .n(n)
12734 .k(k)
12735 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012736 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012737 }
12738 }
12739 }
12740 }
12741
Marat Dukhande06f492020-04-09 00:19:31 -070012742 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012743 TEST_REQUIRES_ARM_NEON_FMA;
12744 for (uint32_t n = 16; n <= 24; n += 8) {
12745 for (size_t k = 1; k <= 20; k += 5) {
12746 GemmMicrokernelTester()
12747 .mr(6)
12748 .nr(8)
12749 .kr(1)
12750 .sr(4)
12751 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012752 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012753 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012754 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012755 }
12756 }
12757 }
12758
Marat Dukhande06f492020-04-09 00:19:31 -070012759 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012760 TEST_REQUIRES_ARM_NEON_FMA;
12761 for (uint32_t n = 16; n <= 24; n += 8) {
12762 for (size_t k = 1; k <= 20; k += 5) {
12763 GemmMicrokernelTester()
12764 .mr(6)
12765 .nr(8)
12766 .kr(1)
12767 .sr(4)
12768 .m(6)
12769 .n(n)
12770 .k(k)
12771 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012772 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012773 }
12774 }
12775 }
12776
Marat Dukhande06f492020-04-09 00:19:31 -070012777 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012778 TEST_REQUIRES_ARM_NEON_FMA;
12779 for (uint32_t n = 16; n <= 24; n += 8) {
12780 for (size_t k = 1; k <= 20; k += 5) {
12781 for (uint32_t m = 1; m <= 6; m++) {
12782 GemmMicrokernelTester()
12783 .mr(6)
12784 .nr(8)
12785 .kr(1)
12786 .sr(4)
12787 .m(m)
12788 .n(n)
12789 .k(k)
12790 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012791 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012792 }
12793 }
12794 }
12795 }
12796
Marat Dukhande06f492020-04-09 00:19:31 -070012797 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012798 TEST_REQUIRES_ARM_NEON_FMA;
12799 for (size_t k = 1; k <= 20; k += 5) {
12800 GemmMicrokernelTester()
12801 .mr(6)
12802 .nr(8)
12803 .kr(1)
12804 .sr(4)
12805 .m(6)
12806 .n(8)
12807 .k(k)
12808 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012809 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012810 }
12811 }
12812
Marat Dukhande06f492020-04-09 00:19:31 -070012813 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012814 TEST_REQUIRES_ARM_NEON_FMA;
12815 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012816 for (uint32_t n = 1; n <= 8; n++) {
12817 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012818 GemmMicrokernelTester()
12819 .mr(6)
12820 .nr(8)
12821 .kr(1)
12822 .sr(4)
12823 .m(m)
12824 .n(n)
12825 .k(k)
12826 .ks(3)
12827 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012828 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012829 }
12830 }
12831 }
12832 }
12833
Marat Dukhande06f492020-04-09 00:19:31 -070012834 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012835 TEST_REQUIRES_ARM_NEON_FMA;
12836 for (uint32_t n = 9; n < 16; n++) {
12837 for (size_t k = 1; k <= 20; k += 5) {
12838 GemmMicrokernelTester()
12839 .mr(6)
12840 .nr(8)
12841 .kr(1)
12842 .sr(4)
12843 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012844 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012845 .k(k)
12846 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012847 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012848 }
12849 }
12850 }
12851
Marat Dukhande06f492020-04-09 00:19:31 -070012852 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012853 TEST_REQUIRES_ARM_NEON_FMA;
12854 for (uint32_t n = 16; n <= 24; n += 8) {
12855 for (size_t k = 1; k <= 20; k += 5) {
12856 GemmMicrokernelTester()
12857 .mr(6)
12858 .nr(8)
12859 .kr(1)
12860 .sr(4)
12861 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012862 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070012863 .k(k)
12864 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012865 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012866 }
12867 }
12868 }
12869
Marat Dukhande06f492020-04-09 00:19:31 -070012870 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012871 TEST_REQUIRES_ARM_NEON_FMA;
12872 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012873 for (uint32_t n = 1; n <= 8; n++) {
12874 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012875 GemmMicrokernelTester()
12876 .mr(6)
12877 .nr(8)
12878 .kr(1)
12879 .sr(4)
12880 .m(m)
12881 .n(n)
12882 .k(k)
12883 .cm_stride(11)
12884 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012885 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012886 }
12887 }
12888 }
12889 }
12890
Marat Dukhande06f492020-04-09 00:19:31 -070012891 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012892 TEST_REQUIRES_ARM_NEON_FMA;
12893 for (size_t k = 1; k <= 20; k += 5) {
12894 GemmMicrokernelTester()
12895 .mr(6)
12896 .nr(8)
12897 .kr(1)
12898 .sr(4)
12899 .m(6)
12900 .n(8)
12901 .k(k)
12902 .ks(3)
12903 .a_offset(127)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012904 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012905 }
12906 }
12907
Marat Dukhande06f492020-04-09 00:19:31 -070012908 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012909 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012910 for (size_t k = 1; k <= 20; k += 5) {
12911 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012912 GemmMicrokernelTester()
12913 .mr(6)
12914 .nr(8)
12915 .kr(1)
12916 .sr(4)
12917 .m(6)
12918 .n(8)
12919 .k(k)
12920 .ks(3)
12921 .a_offset(127)
12922 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012923 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012924 }
12925 }
12926 }
12927
Marat Dukhande06f492020-04-09 00:19:31 -070012928 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012929 TEST_REQUIRES_ARM_NEON_FMA;
12930 GemmMicrokernelTester()
12931 .mr(6)
12932 .nr(8)
12933 .kr(1)
12934 .sr(4)
12935 .m(6)
12936 .n(8)
12937 .k(4)
12938 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012939 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012940 }
12941
Marat Dukhande06f492020-04-09 00:19:31 -070012942 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012943 TEST_REQUIRES_ARM_NEON_FMA;
12944 GemmMicrokernelTester()
12945 .mr(6)
12946 .nr(8)
12947 .kr(1)
12948 .sr(4)
12949 .m(6)
12950 .n(8)
12951 .k(4)
12952 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012953 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012954 }
12955
Marat Dukhande06f492020-04-09 00:19:31 -070012956 TEST(F32_IGEMM_MINMAX_6X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012957 TEST_REQUIRES_ARM_NEON_FMA;
12958 GemmMicrokernelTester()
12959 .mr(6)
12960 .nr(8)
12961 .kr(1)
12962 .sr(4)
12963 .m(6)
12964 .n(8)
12965 .k(4)
12966 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012967 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012968 }
12969#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12970
12971
12972#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhande06f492020-04-09 00:19:31 -070012973 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012974 TEST_REQUIRES_ARM_NEON_FMA;
12975 GemmMicrokernelTester()
12976 .mr(8)
12977 .nr(8)
12978 .kr(1)
12979 .sr(4)
12980 .m(8)
12981 .n(8)
12982 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012983 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012984 }
12985
Marat Dukhande06f492020-04-09 00:19:31 -070012986 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070012987 TEST_REQUIRES_ARM_NEON_FMA;
12988 GemmMicrokernelTester()
12989 .mr(8)
12990 .nr(8)
12991 .kr(1)
12992 .sr(4)
12993 .m(8)
12994 .n(8)
12995 .k(4)
12996 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070012997 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070012998 }
12999
Marat Dukhande06f492020-04-09 00:19:31 -070013000 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013001 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013002 for (uint32_t n = 1; n <= 8; n++) {
13003 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013004 GemmMicrokernelTester()
13005 .mr(8)
13006 .nr(8)
13007 .kr(1)
13008 .sr(4)
13009 .m(m)
13010 .n(n)
13011 .k(4)
13012 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013013 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013014 }
13015 }
13016 }
13017
Marat Dukhande06f492020-04-09 00:19:31 -070013018 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013019 TEST_REQUIRES_ARM_NEON_FMA;
13020 for (uint32_t m = 1; m <= 8; m++) {
13021 GemmMicrokernelTester()
13022 .mr(8)
13023 .nr(8)
13024 .kr(1)
13025 .sr(4)
13026 .m(m)
13027 .n(8)
13028 .k(4)
13029 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013030 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013031 }
13032 }
13033
Marat Dukhande06f492020-04-09 00:19:31 -070013034 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_eq_4_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013035 TEST_REQUIRES_ARM_NEON_FMA;
13036 for (uint32_t n = 1; n <= 8; n++) {
13037 GemmMicrokernelTester()
13038 .mr(8)
13039 .nr(8)
13040 .kr(1)
13041 .sr(4)
13042 .m(8)
13043 .n(n)
13044 .k(4)
13045 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013046 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013047 }
13048 }
13049
Marat Dukhande06f492020-04-09 00:19:31 -070013050 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_lt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013051 TEST_REQUIRES_ARM_NEON_FMA;
13052 for (size_t k = 1; k < 4; k++) {
13053 GemmMicrokernelTester()
13054 .mr(8)
13055 .nr(8)
13056 .kr(1)
13057 .sr(4)
13058 .m(8)
13059 .n(8)
13060 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013061 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013062 }
13063 }
13064
Marat Dukhande06f492020-04-09 00:19:31 -070013065 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_lt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013066 TEST_REQUIRES_ARM_NEON_FMA;
13067 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013068 for (uint32_t n = 1; n <= 8; n++) {
13069 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013070 GemmMicrokernelTester()
13071 .mr(8)
13072 .nr(8)
13073 .kr(1)
13074 .sr(4)
13075 .m(m)
13076 .n(n)
13077 .k(k)
13078 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013079 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013080 }
13081 }
13082 }
13083 }
13084
Marat Dukhande06f492020-04-09 00:19:31 -070013085 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013086 TEST_REQUIRES_ARM_NEON_FMA;
13087 for (size_t k = 5; k < 8; k++) {
13088 GemmMicrokernelTester()
13089 .mr(8)
13090 .nr(8)
13091 .kr(1)
13092 .sr(4)
13093 .m(8)
13094 .n(8)
13095 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013096 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013097 }
13098 }
13099
Marat Dukhande06f492020-04-09 00:19:31 -070013100 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013101 TEST_REQUIRES_ARM_NEON_FMA;
13102 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013103 for (uint32_t n = 1; n <= 8; n++) {
13104 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013105 GemmMicrokernelTester()
13106 .mr(8)
13107 .nr(8)
13108 .kr(1)
13109 .sr(4)
13110 .m(m)
13111 .n(n)
13112 .k(k)
13113 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013114 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013115 }
13116 }
13117 }
13118 }
13119
Marat Dukhande06f492020-04-09 00:19:31 -070013120 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013121 TEST_REQUIRES_ARM_NEON_FMA;
13122 for (size_t k = 8; k <= 40; k += 4) {
13123 GemmMicrokernelTester()
13124 .mr(8)
13125 .nr(8)
13126 .kr(1)
13127 .sr(4)
13128 .m(8)
13129 .n(8)
13130 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013131 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013132 }
13133 }
13134
Marat Dukhande06f492020-04-09 00:19:31 -070013135 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, k_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013136 TEST_REQUIRES_ARM_NEON_FMA;
13137 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013138 for (uint32_t n = 1; n <= 8; n++) {
13139 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013140 GemmMicrokernelTester()
13141 .mr(8)
13142 .nr(8)
13143 .kr(1)
13144 .sr(4)
13145 .m(m)
13146 .n(n)
13147 .k(k)
13148 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013149 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013150 }
13151 }
13152 }
13153 }
13154
Marat Dukhande06f492020-04-09 00:19:31 -070013155 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013156 TEST_REQUIRES_ARM_NEON_FMA;
13157 for (uint32_t n = 9; n < 16; n++) {
13158 for (size_t k = 1; k <= 20; k += 5) {
13159 GemmMicrokernelTester()
13160 .mr(8)
13161 .nr(8)
13162 .kr(1)
13163 .sr(4)
13164 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013165 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013166 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013167 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013168 }
13169 }
13170 }
13171
Marat Dukhande06f492020-04-09 00:19:31 -070013172 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013173 TEST_REQUIRES_ARM_NEON_FMA;
13174 for (uint32_t n = 9; n < 16; n++) {
13175 for (size_t k = 1; k <= 20; k += 5) {
13176 GemmMicrokernelTester()
13177 .mr(8)
13178 .nr(8)
13179 .kr(1)
13180 .sr(4)
13181 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013182 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013183 .k(k)
13184 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013185 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013186 }
13187 }
13188 }
13189
Marat Dukhande06f492020-04-09 00:19:31 -070013190 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013191 TEST_REQUIRES_ARM_NEON_FMA;
13192 for (uint32_t n = 9; n < 16; n++) {
13193 for (size_t k = 1; k <= 20; k += 5) {
13194 for (uint32_t m = 1; m <= 8; m++) {
13195 GemmMicrokernelTester()
13196 .mr(8)
13197 .nr(8)
13198 .kr(1)
13199 .sr(4)
13200 .m(m)
13201 .n(n)
13202 .k(k)
13203 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013204 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013205 }
13206 }
13207 }
13208 }
13209
Marat Dukhande06f492020-04-09 00:19:31 -070013210 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013211 TEST_REQUIRES_ARM_NEON_FMA;
13212 for (uint32_t n = 16; n <= 24; n += 8) {
13213 for (size_t k = 1; k <= 20; k += 5) {
13214 GemmMicrokernelTester()
13215 .mr(8)
13216 .nr(8)
13217 .kr(1)
13218 .sr(4)
13219 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013220 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013221 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013222 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013223 }
13224 }
13225 }
13226
Marat Dukhande06f492020-04-09 00:19:31 -070013227 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013228 TEST_REQUIRES_ARM_NEON_FMA;
13229 for (uint32_t n = 16; n <= 24; n += 8) {
13230 for (size_t k = 1; k <= 20; k += 5) {
13231 GemmMicrokernelTester()
13232 .mr(8)
13233 .nr(8)
13234 .kr(1)
13235 .sr(4)
13236 .m(8)
13237 .n(n)
13238 .k(k)
13239 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013240 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013241 }
13242 }
13243 }
13244
Marat Dukhande06f492020-04-09 00:19:31 -070013245 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013246 TEST_REQUIRES_ARM_NEON_FMA;
13247 for (uint32_t n = 16; n <= 24; n += 8) {
13248 for (size_t k = 1; k <= 20; k += 5) {
13249 for (uint32_t m = 1; m <= 8; m++) {
13250 GemmMicrokernelTester()
13251 .mr(8)
13252 .nr(8)
13253 .kr(1)
13254 .sr(4)
13255 .m(m)
13256 .n(n)
13257 .k(k)
13258 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013259 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013260 }
13261 }
13262 }
13263 }
13264
Marat Dukhande06f492020-04-09 00:19:31 -070013265 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013266 TEST_REQUIRES_ARM_NEON_FMA;
13267 for (size_t k = 1; k <= 20; k += 5) {
13268 GemmMicrokernelTester()
13269 .mr(8)
13270 .nr(8)
13271 .kr(1)
13272 .sr(4)
13273 .m(8)
13274 .n(8)
13275 .k(k)
13276 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013277 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013278 }
13279 }
13280
Marat Dukhande06f492020-04-09 00:19:31 -070013281 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013282 TEST_REQUIRES_ARM_NEON_FMA;
13283 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013284 for (uint32_t n = 1; n <= 8; n++) {
13285 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013286 GemmMicrokernelTester()
13287 .mr(8)
13288 .nr(8)
13289 .kr(1)
13290 .sr(4)
13291 .m(m)
13292 .n(n)
13293 .k(k)
13294 .ks(3)
13295 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013296 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013297 }
13298 }
13299 }
13300 }
13301
Marat Dukhande06f492020-04-09 00:19:31 -070013302 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013303 TEST_REQUIRES_ARM_NEON_FMA;
13304 for (uint32_t n = 9; n < 16; n++) {
13305 for (size_t k = 1; k <= 20; k += 5) {
13306 GemmMicrokernelTester()
13307 .mr(8)
13308 .nr(8)
13309 .kr(1)
13310 .sr(4)
13311 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013312 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013313 .k(k)
13314 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013315 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013316 }
13317 }
13318 }
13319
Marat Dukhande06f492020-04-09 00:19:31 -070013320 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013321 TEST_REQUIRES_ARM_NEON_FMA;
13322 for (uint32_t n = 16; n <= 24; n += 8) {
13323 for (size_t k = 1; k <= 20; k += 5) {
13324 GemmMicrokernelTester()
13325 .mr(8)
13326 .nr(8)
13327 .kr(1)
13328 .sr(4)
13329 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013330 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013331 .k(k)
13332 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013333 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013334 }
13335 }
13336 }
13337
Marat Dukhande06f492020-04-09 00:19:31 -070013338 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013339 TEST_REQUIRES_ARM_NEON_FMA;
13340 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013341 for (uint32_t n = 1; n <= 8; n++) {
13342 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013343 GemmMicrokernelTester()
13344 .mr(8)
13345 .nr(8)
13346 .kr(1)
13347 .sr(4)
13348 .m(m)
13349 .n(n)
13350 .k(k)
13351 .cm_stride(11)
13352 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013353 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013354 }
13355 }
13356 }
13357 }
13358
Marat Dukhande06f492020-04-09 00:19:31 -070013359 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013360 TEST_REQUIRES_ARM_NEON_FMA;
13361 for (size_t k = 1; k <= 20; k += 5) {
13362 GemmMicrokernelTester()
13363 .mr(8)
13364 .nr(8)
13365 .kr(1)
13366 .sr(4)
13367 .m(8)
13368 .n(8)
13369 .k(k)
13370 .ks(3)
13371 .a_offset(163)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013372 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013373 }
13374 }
13375
Marat Dukhande06f492020-04-09 00:19:31 -070013376 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013377 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013378 for (size_t k = 1; k <= 20; k += 5) {
13379 for (uint32_t mz = 0; mz < 8; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013380 GemmMicrokernelTester()
13381 .mr(8)
13382 .nr(8)
13383 .kr(1)
13384 .sr(4)
13385 .m(8)
13386 .n(8)
13387 .k(k)
13388 .ks(3)
13389 .a_offset(163)
13390 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013391 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013392 }
13393 }
13394 }
13395
Marat Dukhande06f492020-04-09 00:19:31 -070013396 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013397 TEST_REQUIRES_ARM_NEON_FMA;
13398 GemmMicrokernelTester()
13399 .mr(8)
13400 .nr(8)
13401 .kr(1)
13402 .sr(4)
13403 .m(8)
13404 .n(8)
13405 .k(4)
13406 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013407 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013408 }
13409
Marat Dukhande06f492020-04-09 00:19:31 -070013410 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013411 TEST_REQUIRES_ARM_NEON_FMA;
13412 GemmMicrokernelTester()
13413 .mr(8)
13414 .nr(8)
13415 .kr(1)
13416 .sr(4)
13417 .m(8)
13418 .n(8)
13419 .k(4)
13420 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013421 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013422 }
13423
Marat Dukhande06f492020-04-09 00:19:31 -070013424 TEST(F32_IGEMM_MINMAX_8X8S4__NEONFMA, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013425 TEST_REQUIRES_ARM_NEON_FMA;
13426 GemmMicrokernelTester()
13427 .mr(8)
13428 .nr(8)
13429 .kr(1)
13430 .sr(4)
13431 .m(8)
13432 .n(8)
13433 .k(4)
13434 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013435 .Test(xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013436 }
13437#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13438
13439
13440#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080013441 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1) {
13442 TEST_REQUIRES_X86_SSE;
13443 GemmMicrokernelTester()
13444 .mr(3)
13445 .nr(8)
13446 .kr(1)
13447 .sr(1)
13448 .m(3)
13449 .n(8)
13450 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013451 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013452 }
13453
13454 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, strided_cn) {
13455 TEST_REQUIRES_X86_SSE;
13456 GemmMicrokernelTester()
13457 .mr(3)
13458 .nr(8)
13459 .kr(1)
13460 .sr(1)
13461 .m(3)
13462 .n(8)
13463 .k(1)
13464 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013465 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013466 }
13467
13468 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile) {
13469 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013470 for (uint32_t n = 1; n <= 8; n++) {
13471 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013472 GemmMicrokernelTester()
13473 .mr(3)
13474 .nr(8)
13475 .kr(1)
13476 .sr(1)
13477 .m(m)
13478 .n(n)
13479 .k(1)
13480 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013481 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013482 }
13483 }
13484 }
13485
13486 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_m) {
13487 TEST_REQUIRES_X86_SSE;
13488 for (uint32_t m = 1; m <= 3; m++) {
13489 GemmMicrokernelTester()
13490 .mr(3)
13491 .nr(8)
13492 .kr(1)
13493 .sr(1)
13494 .m(m)
13495 .n(8)
13496 .k(1)
13497 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013498 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013499 }
13500 }
13501
13502 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_eq_1_subtile_n) {
13503 TEST_REQUIRES_X86_SSE;
13504 for (uint32_t n = 1; n <= 8; n++) {
13505 GemmMicrokernelTester()
13506 .mr(3)
13507 .nr(8)
13508 .kr(1)
13509 .sr(1)
13510 .m(3)
13511 .n(n)
13512 .k(1)
13513 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013514 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013515 }
13516 }
13517
13518 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1) {
13519 TEST_REQUIRES_X86_SSE;
13520 for (size_t k = 2; k < 10; k++) {
13521 GemmMicrokernelTester()
13522 .mr(3)
13523 .nr(8)
13524 .kr(1)
13525 .sr(1)
13526 .m(3)
13527 .n(8)
13528 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013529 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013530 }
13531 }
13532
13533 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, k_gt_1_subtile) {
13534 TEST_REQUIRES_X86_SSE;
13535 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013536 for (uint32_t n = 1; n <= 8; n++) {
13537 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013538 GemmMicrokernelTester()
13539 .mr(3)
13540 .nr(8)
13541 .kr(1)
13542 .sr(1)
13543 .m(m)
13544 .n(n)
13545 .k(k)
13546 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013547 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013548 }
13549 }
13550 }
13551 }
13552
13553 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8) {
13554 TEST_REQUIRES_X86_SSE;
13555 for (uint32_t n = 9; n < 16; n++) {
13556 for (size_t k = 1; k <= 5; k += 2) {
13557 GemmMicrokernelTester()
13558 .mr(3)
13559 .nr(8)
13560 .kr(1)
13561 .sr(1)
13562 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013563 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013564 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013565 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013566 }
13567 }
13568 }
13569
13570 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_strided_cn) {
13571 TEST_REQUIRES_X86_SSE;
13572 for (uint32_t n = 9; n < 16; n++) {
13573 for (size_t k = 1; k <= 5; k += 2) {
13574 GemmMicrokernelTester()
13575 .mr(3)
13576 .nr(8)
13577 .kr(1)
13578 .sr(1)
13579 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013580 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013581 .k(k)
13582 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013583 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013584 }
13585 }
13586 }
13587
13588 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_subtile) {
13589 TEST_REQUIRES_X86_SSE;
13590 for (uint32_t n = 9; n < 16; n++) {
13591 for (size_t k = 1; k <= 5; k += 2) {
13592 for (uint32_t m = 1; m <= 3; m++) {
13593 GemmMicrokernelTester()
13594 .mr(3)
13595 .nr(8)
13596 .kr(1)
13597 .sr(1)
13598 .m(m)
13599 .n(n)
13600 .k(k)
13601 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013602 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013603 }
13604 }
13605 }
13606 }
13607
13608 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8) {
13609 TEST_REQUIRES_X86_SSE;
13610 for (uint32_t n = 16; n <= 24; n += 8) {
13611 for (size_t k = 1; k <= 5; k += 2) {
13612 GemmMicrokernelTester()
13613 .mr(3)
13614 .nr(8)
13615 .kr(1)
13616 .sr(1)
13617 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013618 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013619 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013620 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013621 }
13622 }
13623 }
13624
13625 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_strided_cn) {
13626 TEST_REQUIRES_X86_SSE;
13627 for (uint32_t n = 16; n <= 24; n += 8) {
13628 for (size_t k = 1; k <= 5; k += 2) {
13629 GemmMicrokernelTester()
13630 .mr(3)
13631 .nr(8)
13632 .kr(1)
13633 .sr(1)
13634 .m(3)
13635 .n(n)
13636 .k(k)
13637 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013638 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013639 }
13640 }
13641 }
13642
13643 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_subtile) {
13644 TEST_REQUIRES_X86_SSE;
13645 for (uint32_t n = 16; n <= 24; n += 8) {
13646 for (size_t k = 1; k <= 5; k += 2) {
13647 for (uint32_t m = 1; m <= 3; m++) {
13648 GemmMicrokernelTester()
13649 .mr(3)
13650 .nr(8)
13651 .kr(1)
13652 .sr(1)
13653 .m(m)
13654 .n(n)
13655 .k(k)
13656 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013657 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013658 }
13659 }
13660 }
13661 }
13662
13663 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, small_kernel) {
13664 TEST_REQUIRES_X86_SSE;
13665 for (size_t k = 1; k <= 5; k += 2) {
13666 GemmMicrokernelTester()
13667 .mr(3)
13668 .nr(8)
13669 .kr(1)
13670 .sr(1)
13671 .m(3)
13672 .n(8)
13673 .k(k)
13674 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013675 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013676 }
13677 }
13678
13679 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, small_kernel_subtile) {
13680 TEST_REQUIRES_X86_SSE;
13681 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013682 for (uint32_t n = 1; n <= 8; n++) {
13683 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013684 GemmMicrokernelTester()
13685 .mr(3)
13686 .nr(8)
13687 .kr(1)
13688 .sr(1)
13689 .m(m)
13690 .n(n)
13691 .k(k)
13692 .ks(3)
13693 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013694 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013695 }
13696 }
13697 }
13698 }
13699
13700 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_gt_8_small_kernel) {
13701 TEST_REQUIRES_X86_SSE;
13702 for (uint32_t n = 9; n < 16; n++) {
13703 for (size_t k = 1; k <= 5; k += 2) {
13704 GemmMicrokernelTester()
13705 .mr(3)
13706 .nr(8)
13707 .kr(1)
13708 .sr(1)
13709 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013710 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013711 .k(k)
13712 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013713 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013714 }
13715 }
13716 }
13717
13718 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, n_div_8_small_kernel) {
13719 TEST_REQUIRES_X86_SSE;
13720 for (uint32_t n = 16; n <= 24; n += 8) {
13721 for (size_t k = 1; k <= 5; k += 2) {
13722 GemmMicrokernelTester()
13723 .mr(3)
13724 .nr(8)
13725 .kr(1)
13726 .sr(1)
13727 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013728 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080013729 .k(k)
13730 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013731 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013732 }
13733 }
13734 }
13735
13736 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, strided_cm_subtile) {
13737 TEST_REQUIRES_X86_SSE;
13738 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013739 for (uint32_t n = 1; n <= 8; n++) {
13740 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013741 GemmMicrokernelTester()
13742 .mr(3)
13743 .nr(8)
13744 .kr(1)
13745 .sr(1)
13746 .m(m)
13747 .n(n)
13748 .k(k)
13749 .cm_stride(11)
13750 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013751 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013752 }
13753 }
13754 }
13755 }
13756
13757 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, a_offset) {
13758 TEST_REQUIRES_X86_SSE;
13759 for (size_t k = 1; k <= 5; k += 2) {
13760 GemmMicrokernelTester()
13761 .mr(3)
13762 .nr(8)
13763 .kr(1)
13764 .sr(1)
13765 .m(3)
13766 .n(8)
13767 .k(k)
13768 .ks(3)
13769 .a_offset(17)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013770 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013771 }
13772 }
13773
13774 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, zero) {
13775 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013776 for (size_t k = 1; k <= 5; k += 2) {
13777 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080013778 GemmMicrokernelTester()
13779 .mr(3)
13780 .nr(8)
13781 .kr(1)
13782 .sr(1)
13783 .m(3)
13784 .n(8)
13785 .k(k)
13786 .ks(3)
13787 .a_offset(17)
13788 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013789 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013790 }
13791 }
13792 }
13793
13794 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, qmin) {
13795 TEST_REQUIRES_X86_SSE;
13796 GemmMicrokernelTester()
13797 .mr(3)
13798 .nr(8)
13799 .kr(1)
13800 .sr(1)
13801 .m(3)
13802 .n(8)
13803 .k(1)
13804 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013805 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013806 }
13807
13808 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, qmax) {
13809 TEST_REQUIRES_X86_SSE;
13810 GemmMicrokernelTester()
13811 .mr(3)
13812 .nr(8)
13813 .kr(1)
13814 .sr(1)
13815 .m(3)
13816 .n(8)
13817 .k(1)
13818 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013819 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013820 }
13821
13822 TEST(F32_IGEMM_MINMAX_3X8__SSE_LOAD1, strided_cm) {
13823 TEST_REQUIRES_X86_SSE;
13824 GemmMicrokernelTester()
13825 .mr(3)
13826 .nr(8)
13827 .kr(1)
13828 .sr(1)
13829 .m(3)
13830 .n(8)
13831 .k(1)
13832 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013833 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080013834 }
13835#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13836
13837
13838#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070013839 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013840 TEST_REQUIRES_X86_SSE;
13841 GemmMicrokernelTester()
13842 .mr(4)
13843 .nr(8)
13844 .kr(1)
13845 .sr(1)
13846 .m(4)
13847 .n(8)
13848 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013849 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013850 }
13851
Marat Dukhande06f492020-04-09 00:19:31 -070013852 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013853 TEST_REQUIRES_X86_SSE;
13854 GemmMicrokernelTester()
13855 .mr(4)
13856 .nr(8)
13857 .kr(1)
13858 .sr(1)
13859 .m(4)
13860 .n(8)
13861 .k(1)
13862 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013863 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013864 }
13865
Marat Dukhande06f492020-04-09 00:19:31 -070013866 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013867 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013868 for (uint32_t n = 1; n <= 8; n++) {
13869 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013870 GemmMicrokernelTester()
13871 .mr(4)
13872 .nr(8)
13873 .kr(1)
13874 .sr(1)
13875 .m(m)
13876 .n(n)
13877 .k(1)
13878 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013879 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013880 }
13881 }
13882 }
13883
Marat Dukhande06f492020-04-09 00:19:31 -070013884 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013885 TEST_REQUIRES_X86_SSE;
13886 for (uint32_t m = 1; m <= 4; m++) {
13887 GemmMicrokernelTester()
13888 .mr(4)
13889 .nr(8)
13890 .kr(1)
13891 .sr(1)
13892 .m(m)
13893 .n(8)
13894 .k(1)
13895 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013896 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013897 }
13898 }
13899
Marat Dukhande06f492020-04-09 00:19:31 -070013900 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013901 TEST_REQUIRES_X86_SSE;
13902 for (uint32_t n = 1; n <= 8; n++) {
13903 GemmMicrokernelTester()
13904 .mr(4)
13905 .nr(8)
13906 .kr(1)
13907 .sr(1)
13908 .m(4)
13909 .n(n)
13910 .k(1)
13911 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013912 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013913 }
13914 }
13915
Marat Dukhande06f492020-04-09 00:19:31 -070013916 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013917 TEST_REQUIRES_X86_SSE;
13918 for (size_t k = 2; k < 10; k++) {
13919 GemmMicrokernelTester()
13920 .mr(4)
13921 .nr(8)
13922 .kr(1)
13923 .sr(1)
13924 .m(4)
13925 .n(8)
13926 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013927 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013928 }
13929 }
13930
Marat Dukhande06f492020-04-09 00:19:31 -070013931 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013932 TEST_REQUIRES_X86_SSE;
13933 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013934 for (uint32_t n = 1; n <= 8; n++) {
13935 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013936 GemmMicrokernelTester()
13937 .mr(4)
13938 .nr(8)
13939 .kr(1)
13940 .sr(1)
13941 .m(m)
13942 .n(n)
13943 .k(k)
13944 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013945 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013946 }
13947 }
13948 }
13949 }
13950
Marat Dukhande06f492020-04-09 00:19:31 -070013951 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013952 TEST_REQUIRES_X86_SSE;
13953 for (uint32_t n = 9; n < 16; n++) {
13954 for (size_t k = 1; k <= 5; k += 2) {
13955 GemmMicrokernelTester()
13956 .mr(4)
13957 .nr(8)
13958 .kr(1)
13959 .sr(1)
13960 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013961 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013962 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013963 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013964 }
13965 }
13966 }
13967
Marat Dukhande06f492020-04-09 00:19:31 -070013968 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013969 TEST_REQUIRES_X86_SSE;
13970 for (uint32_t n = 9; n < 16; n++) {
13971 for (size_t k = 1; k <= 5; k += 2) {
13972 GemmMicrokernelTester()
13973 .mr(4)
13974 .nr(8)
13975 .kr(1)
13976 .sr(1)
13977 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013978 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070013979 .k(k)
13980 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070013981 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070013982 }
13983 }
13984 }
13985
Marat Dukhande06f492020-04-09 00:19:31 -070013986 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070013987 TEST_REQUIRES_X86_SSE;
13988 for (uint32_t n = 9; n < 16; n++) {
13989 for (size_t k = 1; k <= 5; k += 2) {
13990 for (uint32_t m = 1; m <= 4; m++) {
13991 GemmMicrokernelTester()
13992 .mr(4)
13993 .nr(8)
13994 .kr(1)
13995 .sr(1)
13996 .m(m)
13997 .n(n)
13998 .k(k)
13999 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014000 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014001 }
14002 }
14003 }
14004 }
14005
Marat Dukhande06f492020-04-09 00:19:31 -070014006 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014007 TEST_REQUIRES_X86_SSE;
14008 for (uint32_t n = 16; n <= 24; n += 8) {
14009 for (size_t k = 1; k <= 5; k += 2) {
14010 GemmMicrokernelTester()
14011 .mr(4)
14012 .nr(8)
14013 .kr(1)
14014 .sr(1)
14015 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014016 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070014017 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014018 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014019 }
14020 }
14021 }
14022
Marat Dukhande06f492020-04-09 00:19:31 -070014023 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014024 TEST_REQUIRES_X86_SSE;
14025 for (uint32_t n = 16; n <= 24; n += 8) {
14026 for (size_t k = 1; k <= 5; k += 2) {
14027 GemmMicrokernelTester()
14028 .mr(4)
14029 .nr(8)
14030 .kr(1)
14031 .sr(1)
14032 .m(4)
14033 .n(n)
14034 .k(k)
14035 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014036 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014037 }
14038 }
14039 }
14040
Marat Dukhande06f492020-04-09 00:19:31 -070014041 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014042 TEST_REQUIRES_X86_SSE;
14043 for (uint32_t n = 16; n <= 24; n += 8) {
14044 for (size_t k = 1; k <= 5; k += 2) {
14045 for (uint32_t m = 1; m <= 4; m++) {
14046 GemmMicrokernelTester()
14047 .mr(4)
14048 .nr(8)
14049 .kr(1)
14050 .sr(1)
14051 .m(m)
14052 .n(n)
14053 .k(k)
14054 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014055 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014056 }
14057 }
14058 }
14059 }
14060
Marat Dukhande06f492020-04-09 00:19:31 -070014061 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014062 TEST_REQUIRES_X86_SSE;
14063 for (size_t k = 1; k <= 5; k += 2) {
14064 GemmMicrokernelTester()
14065 .mr(4)
14066 .nr(8)
14067 .kr(1)
14068 .sr(1)
14069 .m(4)
14070 .n(8)
14071 .k(k)
14072 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014073 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014074 }
14075 }
14076
Marat Dukhande06f492020-04-09 00:19:31 -070014077 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014078 TEST_REQUIRES_X86_SSE;
14079 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014080 for (uint32_t n = 1; n <= 8; n++) {
14081 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014082 GemmMicrokernelTester()
14083 .mr(4)
14084 .nr(8)
14085 .kr(1)
14086 .sr(1)
14087 .m(m)
14088 .n(n)
14089 .k(k)
14090 .ks(3)
14091 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014092 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014093 }
14094 }
14095 }
14096 }
14097
Marat Dukhande06f492020-04-09 00:19:31 -070014098 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014099 TEST_REQUIRES_X86_SSE;
14100 for (uint32_t n = 9; n < 16; n++) {
14101 for (size_t k = 1; k <= 5; k += 2) {
14102 GemmMicrokernelTester()
14103 .mr(4)
14104 .nr(8)
14105 .kr(1)
14106 .sr(1)
14107 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014108 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070014109 .k(k)
14110 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014111 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014112 }
14113 }
14114 }
14115
Marat Dukhande06f492020-04-09 00:19:31 -070014116 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014117 TEST_REQUIRES_X86_SSE;
14118 for (uint32_t n = 16; n <= 24; n += 8) {
14119 for (size_t k = 1; k <= 5; k += 2) {
14120 GemmMicrokernelTester()
14121 .mr(4)
14122 .nr(8)
14123 .kr(1)
14124 .sr(1)
14125 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014126 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070014127 .k(k)
14128 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014129 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014130 }
14131 }
14132 }
14133
Marat Dukhande06f492020-04-09 00:19:31 -070014134 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014135 TEST_REQUIRES_X86_SSE;
14136 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014137 for (uint32_t n = 1; n <= 8; n++) {
14138 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014139 GemmMicrokernelTester()
14140 .mr(4)
14141 .nr(8)
14142 .kr(1)
14143 .sr(1)
14144 .m(m)
14145 .n(n)
14146 .k(k)
14147 .cm_stride(11)
14148 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014149 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014150 }
14151 }
14152 }
14153 }
14154
Marat Dukhande06f492020-04-09 00:19:31 -070014155 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014156 TEST_REQUIRES_X86_SSE;
14157 for (size_t k = 1; k <= 5; k += 2) {
14158 GemmMicrokernelTester()
14159 .mr(4)
14160 .nr(8)
14161 .kr(1)
14162 .sr(1)
14163 .m(4)
14164 .n(8)
14165 .k(k)
14166 .ks(3)
14167 .a_offset(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014168 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014169 }
14170 }
14171
Marat Dukhande06f492020-04-09 00:19:31 -070014172 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014173 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014174 for (size_t k = 1; k <= 5; k += 2) {
14175 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014176 GemmMicrokernelTester()
14177 .mr(4)
14178 .nr(8)
14179 .kr(1)
14180 .sr(1)
14181 .m(4)
14182 .n(8)
14183 .k(k)
14184 .ks(3)
14185 .a_offset(23)
14186 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014187 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014188 }
14189 }
14190 }
14191
Marat Dukhande06f492020-04-09 00:19:31 -070014192 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014193 TEST_REQUIRES_X86_SSE;
14194 GemmMicrokernelTester()
14195 .mr(4)
14196 .nr(8)
14197 .kr(1)
14198 .sr(1)
14199 .m(4)
14200 .n(8)
14201 .k(1)
14202 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014203 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014204 }
14205
Marat Dukhande06f492020-04-09 00:19:31 -070014206 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014207 TEST_REQUIRES_X86_SSE;
14208 GemmMicrokernelTester()
14209 .mr(4)
14210 .nr(8)
14211 .kr(1)
14212 .sr(1)
14213 .m(4)
14214 .n(8)
14215 .k(1)
14216 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014217 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014218 }
14219
Marat Dukhande06f492020-04-09 00:19:31 -070014220 TEST(F32_IGEMM_MINMAX_4X8__SSE_LOAD1, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070014221 TEST_REQUIRES_X86_SSE;
14222 GemmMicrokernelTester()
14223 .mr(4)
14224 .nr(8)
14225 .kr(1)
14226 .sr(1)
14227 .m(4)
14228 .n(8)
14229 .k(1)
14230 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014231 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070014232 }
14233#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14234
14235
14236#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080014237 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1) {
14238 TEST_REQUIRES_X86_SSE;
14239 GemmMicrokernelTester()
14240 .mr(5)
14241 .nr(8)
14242 .kr(1)
14243 .sr(1)
14244 .m(5)
14245 .n(8)
14246 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014247 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014248 }
14249
14250 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, strided_cn) {
14251 TEST_REQUIRES_X86_SSE;
14252 GemmMicrokernelTester()
14253 .mr(5)
14254 .nr(8)
14255 .kr(1)
14256 .sr(1)
14257 .m(5)
14258 .n(8)
14259 .k(1)
14260 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014261 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014262 }
14263
14264 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile) {
14265 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014266 for (uint32_t n = 1; n <= 8; n++) {
14267 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014268 GemmMicrokernelTester()
14269 .mr(5)
14270 .nr(8)
14271 .kr(1)
14272 .sr(1)
14273 .m(m)
14274 .n(n)
14275 .k(1)
14276 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014277 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014278 }
14279 }
14280 }
14281
14282 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_m) {
14283 TEST_REQUIRES_X86_SSE;
14284 for (uint32_t m = 1; m <= 5; m++) {
14285 GemmMicrokernelTester()
14286 .mr(5)
14287 .nr(8)
14288 .kr(1)
14289 .sr(1)
14290 .m(m)
14291 .n(8)
14292 .k(1)
14293 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014294 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014295 }
14296 }
14297
14298 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_eq_1_subtile_n) {
14299 TEST_REQUIRES_X86_SSE;
14300 for (uint32_t n = 1; n <= 8; n++) {
14301 GemmMicrokernelTester()
14302 .mr(5)
14303 .nr(8)
14304 .kr(1)
14305 .sr(1)
14306 .m(5)
14307 .n(n)
14308 .k(1)
14309 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014310 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014311 }
14312 }
14313
14314 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1) {
14315 TEST_REQUIRES_X86_SSE;
14316 for (size_t k = 2; k < 10; k++) {
14317 GemmMicrokernelTester()
14318 .mr(5)
14319 .nr(8)
14320 .kr(1)
14321 .sr(1)
14322 .m(5)
14323 .n(8)
14324 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014325 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014326 }
14327 }
14328
14329 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, k_gt_1_subtile) {
14330 TEST_REQUIRES_X86_SSE;
14331 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014332 for (uint32_t n = 1; n <= 8; n++) {
14333 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014334 GemmMicrokernelTester()
14335 .mr(5)
14336 .nr(8)
14337 .kr(1)
14338 .sr(1)
14339 .m(m)
14340 .n(n)
14341 .k(k)
14342 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014343 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014344 }
14345 }
14346 }
14347 }
14348
14349 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8) {
14350 TEST_REQUIRES_X86_SSE;
14351 for (uint32_t n = 9; n < 16; n++) {
14352 for (size_t k = 1; k <= 5; k += 2) {
14353 GemmMicrokernelTester()
14354 .mr(5)
14355 .nr(8)
14356 .kr(1)
14357 .sr(1)
14358 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014359 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014360 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014361 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014362 }
14363 }
14364 }
14365
14366 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_strided_cn) {
14367 TEST_REQUIRES_X86_SSE;
14368 for (uint32_t n = 9; n < 16; n++) {
14369 for (size_t k = 1; k <= 5; k += 2) {
14370 GemmMicrokernelTester()
14371 .mr(5)
14372 .nr(8)
14373 .kr(1)
14374 .sr(1)
14375 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014376 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014377 .k(k)
14378 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014379 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014380 }
14381 }
14382 }
14383
14384 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_subtile) {
14385 TEST_REQUIRES_X86_SSE;
14386 for (uint32_t n = 9; n < 16; n++) {
14387 for (size_t k = 1; k <= 5; k += 2) {
14388 for (uint32_t m = 1; m <= 5; m++) {
14389 GemmMicrokernelTester()
14390 .mr(5)
14391 .nr(8)
14392 .kr(1)
14393 .sr(1)
14394 .m(m)
14395 .n(n)
14396 .k(k)
14397 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014398 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014399 }
14400 }
14401 }
14402 }
14403
14404 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8) {
14405 TEST_REQUIRES_X86_SSE;
14406 for (uint32_t n = 16; n <= 24; n += 8) {
14407 for (size_t k = 1; k <= 5; k += 2) {
14408 GemmMicrokernelTester()
14409 .mr(5)
14410 .nr(8)
14411 .kr(1)
14412 .sr(1)
14413 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014414 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014415 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014416 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014417 }
14418 }
14419 }
14420
14421 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_strided_cn) {
14422 TEST_REQUIRES_X86_SSE;
14423 for (uint32_t n = 16; n <= 24; n += 8) {
14424 for (size_t k = 1; k <= 5; k += 2) {
14425 GemmMicrokernelTester()
14426 .mr(5)
14427 .nr(8)
14428 .kr(1)
14429 .sr(1)
14430 .m(5)
14431 .n(n)
14432 .k(k)
14433 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014434 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014435 }
14436 }
14437 }
14438
14439 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_subtile) {
14440 TEST_REQUIRES_X86_SSE;
14441 for (uint32_t n = 16; n <= 24; n += 8) {
14442 for (size_t k = 1; k <= 5; k += 2) {
14443 for (uint32_t m = 1; m <= 5; m++) {
14444 GemmMicrokernelTester()
14445 .mr(5)
14446 .nr(8)
14447 .kr(1)
14448 .sr(1)
14449 .m(m)
14450 .n(n)
14451 .k(k)
14452 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014453 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014454 }
14455 }
14456 }
14457 }
14458
14459 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, small_kernel) {
14460 TEST_REQUIRES_X86_SSE;
14461 for (size_t k = 1; k <= 5; k += 2) {
14462 GemmMicrokernelTester()
14463 .mr(5)
14464 .nr(8)
14465 .kr(1)
14466 .sr(1)
14467 .m(5)
14468 .n(8)
14469 .k(k)
14470 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014471 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014472 }
14473 }
14474
14475 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, small_kernel_subtile) {
14476 TEST_REQUIRES_X86_SSE;
14477 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014478 for (uint32_t n = 1; n <= 8; n++) {
14479 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014480 GemmMicrokernelTester()
14481 .mr(5)
14482 .nr(8)
14483 .kr(1)
14484 .sr(1)
14485 .m(m)
14486 .n(n)
14487 .k(k)
14488 .ks(3)
14489 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014490 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014491 }
14492 }
14493 }
14494 }
14495
14496 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_gt_8_small_kernel) {
14497 TEST_REQUIRES_X86_SSE;
14498 for (uint32_t n = 9; n < 16; n++) {
14499 for (size_t k = 1; k <= 5; k += 2) {
14500 GemmMicrokernelTester()
14501 .mr(5)
14502 .nr(8)
14503 .kr(1)
14504 .sr(1)
14505 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014506 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014507 .k(k)
14508 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014509 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014510 }
14511 }
14512 }
14513
14514 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, n_div_8_small_kernel) {
14515 TEST_REQUIRES_X86_SSE;
14516 for (uint32_t n = 16; n <= 24; n += 8) {
14517 for (size_t k = 1; k <= 5; k += 2) {
14518 GemmMicrokernelTester()
14519 .mr(5)
14520 .nr(8)
14521 .kr(1)
14522 .sr(1)
14523 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014524 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014525 .k(k)
14526 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014527 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014528 }
14529 }
14530 }
14531
14532 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, strided_cm_subtile) {
14533 TEST_REQUIRES_X86_SSE;
14534 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014535 for (uint32_t n = 1; n <= 8; n++) {
14536 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014537 GemmMicrokernelTester()
14538 .mr(5)
14539 .nr(8)
14540 .kr(1)
14541 .sr(1)
14542 .m(m)
14543 .n(n)
14544 .k(k)
14545 .cm_stride(11)
14546 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014547 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014548 }
14549 }
14550 }
14551 }
14552
14553 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, a_offset) {
14554 TEST_REQUIRES_X86_SSE;
14555 for (size_t k = 1; k <= 5; k += 2) {
14556 GemmMicrokernelTester()
14557 .mr(5)
14558 .nr(8)
14559 .kr(1)
14560 .sr(1)
14561 .m(5)
14562 .n(8)
14563 .k(k)
14564 .ks(3)
14565 .a_offset(29)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014566 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014567 }
14568 }
14569
14570 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, zero) {
14571 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014572 for (size_t k = 1; k <= 5; k += 2) {
14573 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014574 GemmMicrokernelTester()
14575 .mr(5)
14576 .nr(8)
14577 .kr(1)
14578 .sr(1)
14579 .m(5)
14580 .n(8)
14581 .k(k)
14582 .ks(3)
14583 .a_offset(29)
14584 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014585 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014586 }
14587 }
14588 }
14589
14590 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, qmin) {
14591 TEST_REQUIRES_X86_SSE;
14592 GemmMicrokernelTester()
14593 .mr(5)
14594 .nr(8)
14595 .kr(1)
14596 .sr(1)
14597 .m(5)
14598 .n(8)
14599 .k(1)
14600 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014601 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014602 }
14603
14604 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, qmax) {
14605 TEST_REQUIRES_X86_SSE;
14606 GemmMicrokernelTester()
14607 .mr(5)
14608 .nr(8)
14609 .kr(1)
14610 .sr(1)
14611 .m(5)
14612 .n(8)
14613 .k(1)
14614 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014615 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014616 }
14617
14618 TEST(F32_IGEMM_MINMAX_5X8__SSE_LOAD1, strided_cm) {
14619 TEST_REQUIRES_X86_SSE;
14620 GemmMicrokernelTester()
14621 .mr(5)
14622 .nr(8)
14623 .kr(1)
14624 .sr(1)
14625 .m(5)
14626 .n(8)
14627 .k(1)
14628 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014629 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014630 }
14631#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14632
14633
14634#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080014635 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4) {
14636 TEST_REQUIRES_X86_SSE;
14637 GemmMicrokernelTester()
14638 .mr(3)
14639 .nr(8)
14640 .kr(1)
14641 .sr(1)
14642 .m(3)
14643 .n(8)
14644 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014645 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014646 }
14647
14648 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, strided_cn) {
14649 TEST_REQUIRES_X86_SSE;
14650 GemmMicrokernelTester()
14651 .mr(3)
14652 .nr(8)
14653 .kr(1)
14654 .sr(1)
14655 .m(3)
14656 .n(8)
14657 .k(4)
14658 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014659 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014660 }
14661
14662 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile) {
14663 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014664 for (uint32_t n = 1; n <= 8; n++) {
14665 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014666 GemmMicrokernelTester()
14667 .mr(3)
14668 .nr(8)
14669 .kr(1)
14670 .sr(1)
14671 .m(m)
14672 .n(n)
14673 .k(4)
14674 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014675 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014676 }
14677 }
14678 }
14679
14680 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_m) {
14681 TEST_REQUIRES_X86_SSE;
14682 for (uint32_t m = 1; m <= 3; m++) {
14683 GemmMicrokernelTester()
14684 .mr(3)
14685 .nr(8)
14686 .kr(1)
14687 .sr(1)
14688 .m(m)
14689 .n(8)
14690 .k(4)
14691 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014692 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014693 }
14694 }
14695
14696 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_eq_4_subtile_n) {
14697 TEST_REQUIRES_X86_SSE;
14698 for (uint32_t n = 1; n <= 8; n++) {
14699 GemmMicrokernelTester()
14700 .mr(3)
14701 .nr(8)
14702 .kr(1)
14703 .sr(1)
14704 .m(3)
14705 .n(n)
14706 .k(4)
14707 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014708 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014709 }
14710 }
14711
14712 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_lt_4) {
14713 TEST_REQUIRES_X86_SSE;
14714 for (size_t k = 1; k < 4; k++) {
14715 GemmMicrokernelTester()
14716 .mr(3)
14717 .nr(8)
14718 .kr(1)
14719 .sr(1)
14720 .m(3)
14721 .n(8)
14722 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014723 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014724 }
14725 }
14726
14727 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_lt_4_subtile) {
14728 TEST_REQUIRES_X86_SSE;
14729 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014730 for (uint32_t n = 1; n <= 8; n++) {
14731 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014732 GemmMicrokernelTester()
14733 .mr(3)
14734 .nr(8)
14735 .kr(1)
14736 .sr(1)
14737 .m(m)
14738 .n(n)
14739 .k(k)
14740 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014741 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014742 }
14743 }
14744 }
14745 }
14746
14747 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_gt_4) {
14748 TEST_REQUIRES_X86_SSE;
14749 for (size_t k = 5; k < 8; k++) {
14750 GemmMicrokernelTester()
14751 .mr(3)
14752 .nr(8)
14753 .kr(1)
14754 .sr(1)
14755 .m(3)
14756 .n(8)
14757 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014758 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014759 }
14760 }
14761
14762 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_gt_4_subtile) {
14763 TEST_REQUIRES_X86_SSE;
14764 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014765 for (uint32_t n = 1; n <= 8; n++) {
14766 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014767 GemmMicrokernelTester()
14768 .mr(3)
14769 .nr(8)
14770 .kr(1)
14771 .sr(1)
14772 .m(m)
14773 .n(n)
14774 .k(k)
14775 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014776 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014777 }
14778 }
14779 }
14780 }
14781
14782 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_div_4) {
14783 TEST_REQUIRES_X86_SSE;
14784 for (size_t k = 8; k <= 40; k += 4) {
14785 GemmMicrokernelTester()
14786 .mr(3)
14787 .nr(8)
14788 .kr(1)
14789 .sr(1)
14790 .m(3)
14791 .n(8)
14792 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014793 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014794 }
14795 }
14796
14797 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, k_div_4_subtile) {
14798 TEST_REQUIRES_X86_SSE;
14799 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014800 for (uint32_t n = 1; n <= 8; n++) {
14801 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014802 GemmMicrokernelTester()
14803 .mr(3)
14804 .nr(8)
14805 .kr(1)
14806 .sr(1)
14807 .m(m)
14808 .n(n)
14809 .k(k)
14810 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014811 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014812 }
14813 }
14814 }
14815 }
14816
14817 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8) {
14818 TEST_REQUIRES_X86_SSE;
14819 for (uint32_t n = 9; n < 16; n++) {
14820 for (size_t k = 1; k <= 20; k += 5) {
14821 GemmMicrokernelTester()
14822 .mr(3)
14823 .nr(8)
14824 .kr(1)
14825 .sr(1)
14826 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014827 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014828 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014829 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014830 }
14831 }
14832 }
14833
14834 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8_strided_cn) {
14835 TEST_REQUIRES_X86_SSE;
14836 for (uint32_t n = 9; n < 16; n++) {
14837 for (size_t k = 1; k <= 20; k += 5) {
14838 GemmMicrokernelTester()
14839 .mr(3)
14840 .nr(8)
14841 .kr(1)
14842 .sr(1)
14843 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014844 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014845 .k(k)
14846 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014847 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014848 }
14849 }
14850 }
14851
14852 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8_subtile) {
14853 TEST_REQUIRES_X86_SSE;
14854 for (uint32_t n = 9; n < 16; n++) {
14855 for (size_t k = 1; k <= 20; k += 5) {
14856 for (uint32_t m = 1; m <= 3; m++) {
14857 GemmMicrokernelTester()
14858 .mr(3)
14859 .nr(8)
14860 .kr(1)
14861 .sr(1)
14862 .m(m)
14863 .n(n)
14864 .k(k)
14865 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014866 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014867 }
14868 }
14869 }
14870 }
14871
14872 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8) {
14873 TEST_REQUIRES_X86_SSE;
14874 for (uint32_t n = 16; n <= 24; n += 8) {
14875 for (size_t k = 1; k <= 20; k += 5) {
14876 GemmMicrokernelTester()
14877 .mr(3)
14878 .nr(8)
14879 .kr(1)
14880 .sr(1)
14881 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014882 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014883 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014884 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014885 }
14886 }
14887 }
14888
14889 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8_strided_cn) {
14890 TEST_REQUIRES_X86_SSE;
14891 for (uint32_t n = 16; n <= 24; n += 8) {
14892 for (size_t k = 1; k <= 20; k += 5) {
14893 GemmMicrokernelTester()
14894 .mr(3)
14895 .nr(8)
14896 .kr(1)
14897 .sr(1)
14898 .m(3)
14899 .n(n)
14900 .k(k)
14901 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014902 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014903 }
14904 }
14905 }
14906
14907 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8_subtile) {
14908 TEST_REQUIRES_X86_SSE;
14909 for (uint32_t n = 16; n <= 24; n += 8) {
14910 for (size_t k = 1; k <= 20; k += 5) {
14911 for (uint32_t m = 1; m <= 3; m++) {
14912 GemmMicrokernelTester()
14913 .mr(3)
14914 .nr(8)
14915 .kr(1)
14916 .sr(1)
14917 .m(m)
14918 .n(n)
14919 .k(k)
14920 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014921 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014922 }
14923 }
14924 }
14925 }
14926
14927 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, small_kernel) {
14928 TEST_REQUIRES_X86_SSE;
14929 for (size_t k = 1; k <= 20; k += 5) {
14930 GemmMicrokernelTester()
14931 .mr(3)
14932 .nr(8)
14933 .kr(1)
14934 .sr(1)
14935 .m(3)
14936 .n(8)
14937 .k(k)
14938 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014939 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014940 }
14941 }
14942
14943 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, small_kernel_subtile) {
14944 TEST_REQUIRES_X86_SSE;
14945 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014946 for (uint32_t n = 1; n <= 8; n++) {
14947 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080014948 GemmMicrokernelTester()
14949 .mr(3)
14950 .nr(8)
14951 .kr(1)
14952 .sr(1)
14953 .m(m)
14954 .n(n)
14955 .k(k)
14956 .ks(3)
14957 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014958 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014959 }
14960 }
14961 }
14962 }
14963
14964 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_gt_8_small_kernel) {
14965 TEST_REQUIRES_X86_SSE;
14966 for (uint32_t n = 9; n < 16; n++) {
14967 for (size_t k = 1; k <= 20; k += 5) {
14968 GemmMicrokernelTester()
14969 .mr(3)
14970 .nr(8)
14971 .kr(1)
14972 .sr(1)
14973 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014974 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014975 .k(k)
14976 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014977 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014978 }
14979 }
14980 }
14981
14982 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, n_div_8_small_kernel) {
14983 TEST_REQUIRES_X86_SSE;
14984 for (uint32_t n = 16; n <= 24; n += 8) {
14985 for (size_t k = 1; k <= 20; k += 5) {
14986 GemmMicrokernelTester()
14987 .mr(3)
14988 .nr(8)
14989 .kr(1)
14990 .sr(1)
14991 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014992 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080014993 .k(k)
14994 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070014995 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080014996 }
14997 }
14998 }
14999
15000 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, strided_cm_subtile) {
15001 TEST_REQUIRES_X86_SSE;
15002 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015003 for (uint32_t n = 1; n <= 8; n++) {
15004 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015005 GemmMicrokernelTester()
15006 .mr(3)
15007 .nr(8)
15008 .kr(1)
15009 .sr(1)
15010 .m(m)
15011 .n(n)
15012 .k(k)
15013 .cm_stride(11)
15014 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015015 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015016 }
15017 }
15018 }
15019 }
15020
15021 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, a_offset) {
15022 TEST_REQUIRES_X86_SSE;
15023 for (size_t k = 1; k <= 20; k += 5) {
15024 GemmMicrokernelTester()
15025 .mr(3)
15026 .nr(8)
15027 .kr(1)
15028 .sr(1)
15029 .m(3)
15030 .n(8)
15031 .k(k)
15032 .ks(3)
15033 .a_offset(67)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015034 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015035 }
15036 }
15037
15038 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, zero) {
15039 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015040 for (size_t k = 1; k <= 20; k += 5) {
15041 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015042 GemmMicrokernelTester()
15043 .mr(3)
15044 .nr(8)
15045 .kr(1)
15046 .sr(1)
15047 .m(3)
15048 .n(8)
15049 .k(k)
15050 .ks(3)
15051 .a_offset(67)
15052 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015053 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015054 }
15055 }
15056 }
15057
15058 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, qmin) {
15059 TEST_REQUIRES_X86_SSE;
15060 GemmMicrokernelTester()
15061 .mr(3)
15062 .nr(8)
15063 .kr(1)
15064 .sr(1)
15065 .m(3)
15066 .n(8)
15067 .k(4)
15068 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015069 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015070 }
15071
15072 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, qmax) {
15073 TEST_REQUIRES_X86_SSE;
15074 GemmMicrokernelTester()
15075 .mr(3)
15076 .nr(8)
15077 .kr(1)
15078 .sr(1)
15079 .m(3)
15080 .n(8)
15081 .k(4)
15082 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015083 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015084 }
15085
15086 TEST(F32_IGEMM_MINMAX_3X8__SSE_DUP, strided_cm) {
15087 TEST_REQUIRES_X86_SSE;
15088 GemmMicrokernelTester()
15089 .mr(3)
15090 .nr(8)
15091 .kr(1)
15092 .sr(1)
15093 .m(3)
15094 .n(8)
15095 .k(4)
15096 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015097 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015098 }
15099#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15100
15101
15102#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080015103 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4) {
15104 TEST_REQUIRES_X86_SSE;
15105 GemmMicrokernelTester()
15106 .mr(5)
15107 .nr(8)
15108 .kr(1)
15109 .sr(1)
15110 .m(5)
15111 .n(8)
15112 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015113 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015114 }
15115
15116 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, strided_cn) {
15117 TEST_REQUIRES_X86_SSE;
15118 GemmMicrokernelTester()
15119 .mr(5)
15120 .nr(8)
15121 .kr(1)
15122 .sr(1)
15123 .m(5)
15124 .n(8)
15125 .k(4)
15126 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015127 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015128 }
15129
15130 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile) {
15131 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015132 for (uint32_t n = 1; n <= 8; n++) {
15133 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015134 GemmMicrokernelTester()
15135 .mr(5)
15136 .nr(8)
15137 .kr(1)
15138 .sr(1)
15139 .m(m)
15140 .n(n)
15141 .k(4)
15142 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015143 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015144 }
15145 }
15146 }
15147
15148 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_m) {
15149 TEST_REQUIRES_X86_SSE;
15150 for (uint32_t m = 1; m <= 5; m++) {
15151 GemmMicrokernelTester()
15152 .mr(5)
15153 .nr(8)
15154 .kr(1)
15155 .sr(1)
15156 .m(m)
15157 .n(8)
15158 .k(4)
15159 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015160 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015161 }
15162 }
15163
15164 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_eq_4_subtile_n) {
15165 TEST_REQUIRES_X86_SSE;
15166 for (uint32_t n = 1; n <= 8; n++) {
15167 GemmMicrokernelTester()
15168 .mr(5)
15169 .nr(8)
15170 .kr(1)
15171 .sr(1)
15172 .m(5)
15173 .n(n)
15174 .k(4)
15175 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015176 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015177 }
15178 }
15179
15180 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_lt_4) {
15181 TEST_REQUIRES_X86_SSE;
15182 for (size_t k = 1; k < 4; k++) {
15183 GemmMicrokernelTester()
15184 .mr(5)
15185 .nr(8)
15186 .kr(1)
15187 .sr(1)
15188 .m(5)
15189 .n(8)
15190 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015191 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015192 }
15193 }
15194
15195 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_lt_4_subtile) {
15196 TEST_REQUIRES_X86_SSE;
15197 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015198 for (uint32_t n = 1; n <= 8; n++) {
15199 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015200 GemmMicrokernelTester()
15201 .mr(5)
15202 .nr(8)
15203 .kr(1)
15204 .sr(1)
15205 .m(m)
15206 .n(n)
15207 .k(k)
15208 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015209 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015210 }
15211 }
15212 }
15213 }
15214
15215 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_gt_4) {
15216 TEST_REQUIRES_X86_SSE;
15217 for (size_t k = 5; k < 8; k++) {
15218 GemmMicrokernelTester()
15219 .mr(5)
15220 .nr(8)
15221 .kr(1)
15222 .sr(1)
15223 .m(5)
15224 .n(8)
15225 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015226 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015227 }
15228 }
15229
15230 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_gt_4_subtile) {
15231 TEST_REQUIRES_X86_SSE;
15232 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015233 for (uint32_t n = 1; n <= 8; n++) {
15234 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015235 GemmMicrokernelTester()
15236 .mr(5)
15237 .nr(8)
15238 .kr(1)
15239 .sr(1)
15240 .m(m)
15241 .n(n)
15242 .k(k)
15243 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015244 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015245 }
15246 }
15247 }
15248 }
15249
15250 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_div_4) {
15251 TEST_REQUIRES_X86_SSE;
15252 for (size_t k = 8; k <= 40; k += 4) {
15253 GemmMicrokernelTester()
15254 .mr(5)
15255 .nr(8)
15256 .kr(1)
15257 .sr(1)
15258 .m(5)
15259 .n(8)
15260 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015261 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015262 }
15263 }
15264
15265 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, k_div_4_subtile) {
15266 TEST_REQUIRES_X86_SSE;
15267 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015268 for (uint32_t n = 1; n <= 8; n++) {
15269 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015270 GemmMicrokernelTester()
15271 .mr(5)
15272 .nr(8)
15273 .kr(1)
15274 .sr(1)
15275 .m(m)
15276 .n(n)
15277 .k(k)
15278 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015279 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015280 }
15281 }
15282 }
15283 }
15284
15285 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8) {
15286 TEST_REQUIRES_X86_SSE;
15287 for (uint32_t n = 9; n < 16; n++) {
15288 for (size_t k = 1; k <= 20; k += 5) {
15289 GemmMicrokernelTester()
15290 .mr(5)
15291 .nr(8)
15292 .kr(1)
15293 .sr(1)
15294 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015295 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015296 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015297 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015298 }
15299 }
15300 }
15301
15302 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8_strided_cn) {
15303 TEST_REQUIRES_X86_SSE;
15304 for (uint32_t n = 9; n < 16; n++) {
15305 for (size_t k = 1; k <= 20; k += 5) {
15306 GemmMicrokernelTester()
15307 .mr(5)
15308 .nr(8)
15309 .kr(1)
15310 .sr(1)
15311 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015312 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015313 .k(k)
15314 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015315 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015316 }
15317 }
15318 }
15319
15320 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8_subtile) {
15321 TEST_REQUIRES_X86_SSE;
15322 for (uint32_t n = 9; n < 16; n++) {
15323 for (size_t k = 1; k <= 20; k += 5) {
15324 for (uint32_t m = 1; m <= 5; m++) {
15325 GemmMicrokernelTester()
15326 .mr(5)
15327 .nr(8)
15328 .kr(1)
15329 .sr(1)
15330 .m(m)
15331 .n(n)
15332 .k(k)
15333 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015334 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015335 }
15336 }
15337 }
15338 }
15339
15340 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8) {
15341 TEST_REQUIRES_X86_SSE;
15342 for (uint32_t n = 16; n <= 24; n += 8) {
15343 for (size_t k = 1; k <= 20; k += 5) {
15344 GemmMicrokernelTester()
15345 .mr(5)
15346 .nr(8)
15347 .kr(1)
15348 .sr(1)
15349 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015350 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015351 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015352 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015353 }
15354 }
15355 }
15356
15357 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8_strided_cn) {
15358 TEST_REQUIRES_X86_SSE;
15359 for (uint32_t n = 16; n <= 24; n += 8) {
15360 for (size_t k = 1; k <= 20; k += 5) {
15361 GemmMicrokernelTester()
15362 .mr(5)
15363 .nr(8)
15364 .kr(1)
15365 .sr(1)
15366 .m(5)
15367 .n(n)
15368 .k(k)
15369 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015370 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015371 }
15372 }
15373 }
15374
15375 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8_subtile) {
15376 TEST_REQUIRES_X86_SSE;
15377 for (uint32_t n = 16; n <= 24; n += 8) {
15378 for (size_t k = 1; k <= 20; k += 5) {
15379 for (uint32_t m = 1; m <= 5; m++) {
15380 GemmMicrokernelTester()
15381 .mr(5)
15382 .nr(8)
15383 .kr(1)
15384 .sr(1)
15385 .m(m)
15386 .n(n)
15387 .k(k)
15388 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015389 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015390 }
15391 }
15392 }
15393 }
15394
15395 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, small_kernel) {
15396 TEST_REQUIRES_X86_SSE;
15397 for (size_t k = 1; k <= 20; k += 5) {
15398 GemmMicrokernelTester()
15399 .mr(5)
15400 .nr(8)
15401 .kr(1)
15402 .sr(1)
15403 .m(5)
15404 .n(8)
15405 .k(k)
15406 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015407 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015408 }
15409 }
15410
15411 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, small_kernel_subtile) {
15412 TEST_REQUIRES_X86_SSE;
15413 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015414 for (uint32_t n = 1; n <= 8; n++) {
15415 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015416 GemmMicrokernelTester()
15417 .mr(5)
15418 .nr(8)
15419 .kr(1)
15420 .sr(1)
15421 .m(m)
15422 .n(n)
15423 .k(k)
15424 .ks(3)
15425 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015426 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015427 }
15428 }
15429 }
15430 }
15431
15432 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_gt_8_small_kernel) {
15433 TEST_REQUIRES_X86_SSE;
15434 for (uint32_t n = 9; n < 16; n++) {
15435 for (size_t k = 1; k <= 20; k += 5) {
15436 GemmMicrokernelTester()
15437 .mr(5)
15438 .nr(8)
15439 .kr(1)
15440 .sr(1)
15441 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015442 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015443 .k(k)
15444 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015445 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015446 }
15447 }
15448 }
15449
15450 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, n_div_8_small_kernel) {
15451 TEST_REQUIRES_X86_SSE;
15452 for (uint32_t n = 16; n <= 24; n += 8) {
15453 for (size_t k = 1; k <= 20; k += 5) {
15454 GemmMicrokernelTester()
15455 .mr(5)
15456 .nr(8)
15457 .kr(1)
15458 .sr(1)
15459 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015460 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015461 .k(k)
15462 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015463 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015464 }
15465 }
15466 }
15467
15468 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, strided_cm_subtile) {
15469 TEST_REQUIRES_X86_SSE;
15470 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015471 for (uint32_t n = 1; n <= 8; n++) {
15472 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015473 GemmMicrokernelTester()
15474 .mr(5)
15475 .nr(8)
15476 .kr(1)
15477 .sr(1)
15478 .m(m)
15479 .n(n)
15480 .k(k)
15481 .cm_stride(11)
15482 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015483 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015484 }
15485 }
15486 }
15487 }
15488
15489 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, a_offset) {
15490 TEST_REQUIRES_X86_SSE;
15491 for (size_t k = 1; k <= 20; k += 5) {
15492 GemmMicrokernelTester()
15493 .mr(5)
15494 .nr(8)
15495 .kr(1)
15496 .sr(1)
15497 .m(5)
15498 .n(8)
15499 .k(k)
15500 .ks(3)
15501 .a_offset(103)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015502 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015503 }
15504 }
15505
15506 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, zero) {
15507 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015508 for (size_t k = 1; k <= 20; k += 5) {
15509 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015510 GemmMicrokernelTester()
15511 .mr(5)
15512 .nr(8)
15513 .kr(1)
15514 .sr(1)
15515 .m(5)
15516 .n(8)
15517 .k(k)
15518 .ks(3)
15519 .a_offset(103)
15520 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015521 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015522 }
15523 }
15524 }
15525
15526 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, qmin) {
15527 TEST_REQUIRES_X86_SSE;
15528 GemmMicrokernelTester()
15529 .mr(5)
15530 .nr(8)
15531 .kr(1)
15532 .sr(1)
15533 .m(5)
15534 .n(8)
15535 .k(4)
15536 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015537 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015538 }
15539
15540 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, qmax) {
15541 TEST_REQUIRES_X86_SSE;
15542 GemmMicrokernelTester()
15543 .mr(5)
15544 .nr(8)
15545 .kr(1)
15546 .sr(1)
15547 .m(5)
15548 .n(8)
15549 .k(4)
15550 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015551 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015552 }
15553
15554 TEST(F32_IGEMM_MINMAX_5X8__SSE_DUP, strided_cm) {
15555 TEST_REQUIRES_X86_SSE;
15556 GemmMicrokernelTester()
15557 .mr(5)
15558 .nr(8)
15559 .kr(1)
15560 .sr(1)
15561 .m(5)
15562 .n(8)
15563 .k(4)
15564 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015565 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015566 }
15567#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15568
15569
15570#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080015571 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4) {
15572 TEST_REQUIRES_X86_SSE;
15573 GemmMicrokernelTester()
15574 .mr(3)
15575 .nr(8)
15576 .kr(1)
15577 .sr(4)
15578 .m(3)
15579 .n(8)
15580 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015581 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015582 }
15583
15584 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, strided_cn) {
15585 TEST_REQUIRES_X86_SSE;
15586 GemmMicrokernelTester()
15587 .mr(3)
15588 .nr(8)
15589 .kr(1)
15590 .sr(4)
15591 .m(3)
15592 .n(8)
15593 .k(4)
15594 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015595 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015596 }
15597
15598 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile) {
15599 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015600 for (uint32_t n = 1; n <= 8; n++) {
15601 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015602 GemmMicrokernelTester()
15603 .mr(3)
15604 .nr(8)
15605 .kr(1)
15606 .sr(4)
15607 .m(m)
15608 .n(n)
15609 .k(4)
15610 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015611 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015612 }
15613 }
15614 }
15615
15616 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile_m) {
15617 TEST_REQUIRES_X86_SSE;
15618 for (uint32_t m = 1; m <= 3; m++) {
15619 GemmMicrokernelTester()
15620 .mr(3)
15621 .nr(8)
15622 .kr(1)
15623 .sr(4)
15624 .m(m)
15625 .n(8)
15626 .k(4)
15627 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015628 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015629 }
15630 }
15631
15632 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_eq_4_subtile_n) {
15633 TEST_REQUIRES_X86_SSE;
15634 for (uint32_t n = 1; n <= 8; n++) {
15635 GemmMicrokernelTester()
15636 .mr(3)
15637 .nr(8)
15638 .kr(1)
15639 .sr(4)
15640 .m(3)
15641 .n(n)
15642 .k(4)
15643 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015644 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015645 }
15646 }
15647
15648 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_lt_4) {
15649 TEST_REQUIRES_X86_SSE;
15650 for (size_t k = 1; k < 4; k++) {
15651 GemmMicrokernelTester()
15652 .mr(3)
15653 .nr(8)
15654 .kr(1)
15655 .sr(4)
15656 .m(3)
15657 .n(8)
15658 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015659 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015660 }
15661 }
15662
15663 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_lt_4_subtile) {
15664 TEST_REQUIRES_X86_SSE;
15665 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015666 for (uint32_t n = 1; n <= 8; n++) {
15667 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015668 GemmMicrokernelTester()
15669 .mr(3)
15670 .nr(8)
15671 .kr(1)
15672 .sr(4)
15673 .m(m)
15674 .n(n)
15675 .k(k)
15676 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015677 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015678 }
15679 }
15680 }
15681 }
15682
15683 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_gt_4) {
15684 TEST_REQUIRES_X86_SSE;
15685 for (size_t k = 5; k < 8; k++) {
15686 GemmMicrokernelTester()
15687 .mr(3)
15688 .nr(8)
15689 .kr(1)
15690 .sr(4)
15691 .m(3)
15692 .n(8)
15693 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015694 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015695 }
15696 }
15697
15698 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_gt_4_subtile) {
15699 TEST_REQUIRES_X86_SSE;
15700 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015701 for (uint32_t n = 1; n <= 8; n++) {
15702 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015703 GemmMicrokernelTester()
15704 .mr(3)
15705 .nr(8)
15706 .kr(1)
15707 .sr(4)
15708 .m(m)
15709 .n(n)
15710 .k(k)
15711 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015712 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015713 }
15714 }
15715 }
15716 }
15717
15718 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_div_4) {
15719 TEST_REQUIRES_X86_SSE;
15720 for (size_t k = 8; k <= 40; k += 4) {
15721 GemmMicrokernelTester()
15722 .mr(3)
15723 .nr(8)
15724 .kr(1)
15725 .sr(4)
15726 .m(3)
15727 .n(8)
15728 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015729 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015730 }
15731 }
15732
15733 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, k_div_4_subtile) {
15734 TEST_REQUIRES_X86_SSE;
15735 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015736 for (uint32_t n = 1; n <= 8; n++) {
15737 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015738 GemmMicrokernelTester()
15739 .mr(3)
15740 .nr(8)
15741 .kr(1)
15742 .sr(4)
15743 .m(m)
15744 .n(n)
15745 .k(k)
15746 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015747 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015748 }
15749 }
15750 }
15751 }
15752
15753 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8) {
15754 TEST_REQUIRES_X86_SSE;
15755 for (uint32_t n = 9; n < 16; n++) {
15756 for (size_t k = 1; k <= 20; k += 5) {
15757 GemmMicrokernelTester()
15758 .mr(3)
15759 .nr(8)
15760 .kr(1)
15761 .sr(4)
15762 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015763 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015764 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015765 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015766 }
15767 }
15768 }
15769
15770 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8_strided_cn) {
15771 TEST_REQUIRES_X86_SSE;
15772 for (uint32_t n = 9; n < 16; n++) {
15773 for (size_t k = 1; k <= 20; k += 5) {
15774 GemmMicrokernelTester()
15775 .mr(3)
15776 .nr(8)
15777 .kr(1)
15778 .sr(4)
15779 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015780 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015781 .k(k)
15782 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015783 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015784 }
15785 }
15786 }
15787
15788 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8_subtile) {
15789 TEST_REQUIRES_X86_SSE;
15790 for (uint32_t n = 9; n < 16; n++) {
15791 for (size_t k = 1; k <= 20; k += 5) {
15792 for (uint32_t m = 1; m <= 3; m++) {
15793 GemmMicrokernelTester()
15794 .mr(3)
15795 .nr(8)
15796 .kr(1)
15797 .sr(4)
15798 .m(m)
15799 .n(n)
15800 .k(k)
15801 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015802 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015803 }
15804 }
15805 }
15806 }
15807
15808 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8) {
15809 TEST_REQUIRES_X86_SSE;
15810 for (uint32_t n = 16; n <= 24; n += 8) {
15811 for (size_t k = 1; k <= 20; k += 5) {
15812 GemmMicrokernelTester()
15813 .mr(3)
15814 .nr(8)
15815 .kr(1)
15816 .sr(4)
15817 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015818 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015819 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015820 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015821 }
15822 }
15823 }
15824
15825 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8_strided_cn) {
15826 TEST_REQUIRES_X86_SSE;
15827 for (uint32_t n = 16; n <= 24; n += 8) {
15828 for (size_t k = 1; k <= 20; k += 5) {
15829 GemmMicrokernelTester()
15830 .mr(3)
15831 .nr(8)
15832 .kr(1)
15833 .sr(4)
15834 .m(3)
15835 .n(n)
15836 .k(k)
15837 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015838 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015839 }
15840 }
15841 }
15842
15843 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8_subtile) {
15844 TEST_REQUIRES_X86_SSE;
15845 for (uint32_t n = 16; n <= 24; n += 8) {
15846 for (size_t k = 1; k <= 20; k += 5) {
15847 for (uint32_t m = 1; m <= 3; m++) {
15848 GemmMicrokernelTester()
15849 .mr(3)
15850 .nr(8)
15851 .kr(1)
15852 .sr(4)
15853 .m(m)
15854 .n(n)
15855 .k(k)
15856 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015857 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015858 }
15859 }
15860 }
15861 }
15862
15863 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, small_kernel) {
15864 TEST_REQUIRES_X86_SSE;
15865 for (size_t k = 1; k <= 20; k += 5) {
15866 GemmMicrokernelTester()
15867 .mr(3)
15868 .nr(8)
15869 .kr(1)
15870 .sr(4)
15871 .m(3)
15872 .n(8)
15873 .k(k)
15874 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015875 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015876 }
15877 }
15878
15879 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, small_kernel_subtile) {
15880 TEST_REQUIRES_X86_SSE;
15881 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015882 for (uint32_t n = 1; n <= 8; n++) {
15883 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015884 GemmMicrokernelTester()
15885 .mr(3)
15886 .nr(8)
15887 .kr(1)
15888 .sr(4)
15889 .m(m)
15890 .n(n)
15891 .k(k)
15892 .ks(3)
15893 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015894 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015895 }
15896 }
15897 }
15898 }
15899
15900 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_gt_8_small_kernel) {
15901 TEST_REQUIRES_X86_SSE;
15902 for (uint32_t n = 9; n < 16; n++) {
15903 for (size_t k = 1; k <= 20; k += 5) {
15904 GemmMicrokernelTester()
15905 .mr(3)
15906 .nr(8)
15907 .kr(1)
15908 .sr(4)
15909 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015910 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015911 .k(k)
15912 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015913 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015914 }
15915 }
15916 }
15917
15918 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, n_div_8_small_kernel) {
15919 TEST_REQUIRES_X86_SSE;
15920 for (uint32_t n = 16; n <= 24; n += 8) {
15921 for (size_t k = 1; k <= 20; k += 5) {
15922 GemmMicrokernelTester()
15923 .mr(3)
15924 .nr(8)
15925 .kr(1)
15926 .sr(4)
15927 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015928 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080015929 .k(k)
15930 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015931 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015932 }
15933 }
15934 }
15935
15936 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, strided_cm_subtile) {
15937 TEST_REQUIRES_X86_SSE;
15938 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015939 for (uint32_t n = 1; n <= 8; n++) {
15940 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015941 GemmMicrokernelTester()
15942 .mr(3)
15943 .nr(8)
15944 .kr(1)
15945 .sr(4)
15946 .m(m)
15947 .n(n)
15948 .k(k)
15949 .cm_stride(11)
15950 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015951 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015952 }
15953 }
15954 }
15955 }
15956
15957 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, a_offset) {
15958 TEST_REQUIRES_X86_SSE;
15959 for (size_t k = 1; k <= 20; k += 5) {
15960 GemmMicrokernelTester()
15961 .mr(3)
15962 .nr(8)
15963 .kr(1)
15964 .sr(4)
15965 .m(3)
15966 .n(8)
15967 .k(k)
15968 .ks(3)
15969 .a_offset(67)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015970 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015971 }
15972 }
15973
15974 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, zero) {
15975 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015976 for (size_t k = 1; k <= 20; k += 5) {
15977 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080015978 GemmMicrokernelTester()
15979 .mr(3)
15980 .nr(8)
15981 .kr(1)
15982 .sr(4)
15983 .m(3)
15984 .n(8)
15985 .k(k)
15986 .ks(3)
15987 .a_offset(67)
15988 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070015989 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080015990 }
15991 }
15992 }
15993
15994 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, qmin) {
15995 TEST_REQUIRES_X86_SSE;
15996 GemmMicrokernelTester()
15997 .mr(3)
15998 .nr(8)
15999 .kr(1)
16000 .sr(4)
16001 .m(3)
16002 .n(8)
16003 .k(4)
16004 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016005 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016006 }
16007
16008 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, qmax) {
16009 TEST_REQUIRES_X86_SSE;
16010 GemmMicrokernelTester()
16011 .mr(3)
16012 .nr(8)
16013 .kr(1)
16014 .sr(4)
16015 .m(3)
16016 .n(8)
16017 .k(4)
16018 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016019 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016020 }
16021
16022 TEST(F32_IGEMM_MINMAX_3X8S4__SSE, strided_cm) {
16023 TEST_REQUIRES_X86_SSE;
16024 GemmMicrokernelTester()
16025 .mr(3)
16026 .nr(8)
16027 .kr(1)
16028 .sr(4)
16029 .m(3)
16030 .n(8)
16031 .k(4)
16032 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016033 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__sse, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016034 }
16035#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16036
16037
16038#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080016039 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4) {
16040 TEST_REQUIRES_X86_SSE2;
16041 GemmMicrokernelTester()
16042 .mr(1)
16043 .nr(8)
16044 .kr(1)
16045 .sr(1)
16046 .m(1)
16047 .n(8)
16048 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016049 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016050 }
16051
16052 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, strided_cn) {
16053 TEST_REQUIRES_X86_SSE2;
16054 GemmMicrokernelTester()
16055 .mr(1)
16056 .nr(8)
16057 .kr(1)
16058 .sr(1)
16059 .m(1)
16060 .n(8)
16061 .k(4)
16062 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016063 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016064 }
16065
16066 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile) {
16067 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016068 for (uint32_t n = 1; n <= 8; n++) {
16069 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016070 GemmMicrokernelTester()
16071 .mr(1)
16072 .nr(8)
16073 .kr(1)
16074 .sr(1)
16075 .m(m)
16076 .n(n)
16077 .k(4)
16078 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016079 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016080 }
16081 }
16082 }
16083
16084 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_m) {
16085 TEST_REQUIRES_X86_SSE2;
16086 for (uint32_t m = 1; m <= 1; m++) {
16087 GemmMicrokernelTester()
16088 .mr(1)
16089 .nr(8)
16090 .kr(1)
16091 .sr(1)
16092 .m(m)
16093 .n(8)
16094 .k(4)
16095 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016096 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016097 }
16098 }
16099
16100 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_eq_4_subtile_n) {
16101 TEST_REQUIRES_X86_SSE2;
16102 for (uint32_t n = 1; n <= 8; n++) {
16103 GemmMicrokernelTester()
16104 .mr(1)
16105 .nr(8)
16106 .kr(1)
16107 .sr(1)
16108 .m(1)
16109 .n(n)
16110 .k(4)
16111 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016112 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016113 }
16114 }
16115
16116 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_lt_4) {
16117 TEST_REQUIRES_X86_SSE2;
16118 for (size_t k = 1; k < 4; k++) {
16119 GemmMicrokernelTester()
16120 .mr(1)
16121 .nr(8)
16122 .kr(1)
16123 .sr(1)
16124 .m(1)
16125 .n(8)
16126 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016127 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016128 }
16129 }
16130
16131 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_lt_4_subtile) {
16132 TEST_REQUIRES_X86_SSE2;
16133 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016134 for (uint32_t n = 1; n <= 8; n++) {
16135 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016136 GemmMicrokernelTester()
16137 .mr(1)
16138 .nr(8)
16139 .kr(1)
16140 .sr(1)
16141 .m(m)
16142 .n(n)
16143 .k(k)
16144 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016145 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016146 }
16147 }
16148 }
16149 }
16150
16151 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_gt_4) {
16152 TEST_REQUIRES_X86_SSE2;
16153 for (size_t k = 5; k < 8; k++) {
16154 GemmMicrokernelTester()
16155 .mr(1)
16156 .nr(8)
16157 .kr(1)
16158 .sr(1)
16159 .m(1)
16160 .n(8)
16161 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016162 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016163 }
16164 }
16165
16166 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_gt_4_subtile) {
16167 TEST_REQUIRES_X86_SSE2;
16168 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016169 for (uint32_t n = 1; n <= 8; n++) {
16170 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016171 GemmMicrokernelTester()
16172 .mr(1)
16173 .nr(8)
16174 .kr(1)
16175 .sr(1)
16176 .m(m)
16177 .n(n)
16178 .k(k)
16179 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016180 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016181 }
16182 }
16183 }
16184 }
16185
16186 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_div_4) {
16187 TEST_REQUIRES_X86_SSE2;
16188 for (size_t k = 8; k <= 40; k += 4) {
16189 GemmMicrokernelTester()
16190 .mr(1)
16191 .nr(8)
16192 .kr(1)
16193 .sr(1)
16194 .m(1)
16195 .n(8)
16196 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016197 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016198 }
16199 }
16200
16201 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, k_div_4_subtile) {
16202 TEST_REQUIRES_X86_SSE2;
16203 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016204 for (uint32_t n = 1; n <= 8; n++) {
16205 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016206 GemmMicrokernelTester()
16207 .mr(1)
16208 .nr(8)
16209 .kr(1)
16210 .sr(1)
16211 .m(m)
16212 .n(n)
16213 .k(k)
16214 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016215 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016216 }
16217 }
16218 }
16219 }
16220
16221 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8) {
16222 TEST_REQUIRES_X86_SSE2;
16223 for (uint32_t n = 9; n < 16; n++) {
16224 for (size_t k = 1; k <= 20; k += 5) {
16225 GemmMicrokernelTester()
16226 .mr(1)
16227 .nr(8)
16228 .kr(1)
16229 .sr(1)
16230 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016231 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016232 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016233 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016234 }
16235 }
16236 }
16237
16238 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_strided_cn) {
16239 TEST_REQUIRES_X86_SSE2;
16240 for (uint32_t n = 9; n < 16; n++) {
16241 for (size_t k = 1; k <= 20; k += 5) {
16242 GemmMicrokernelTester()
16243 .mr(1)
16244 .nr(8)
16245 .kr(1)
16246 .sr(1)
16247 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016248 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016249 .k(k)
16250 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016251 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016252 }
16253 }
16254 }
16255
16256 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_subtile) {
16257 TEST_REQUIRES_X86_SSE2;
16258 for (uint32_t n = 9; n < 16; n++) {
16259 for (size_t k = 1; k <= 20; k += 5) {
16260 for (uint32_t m = 1; m <= 1; m++) {
16261 GemmMicrokernelTester()
16262 .mr(1)
16263 .nr(8)
16264 .kr(1)
16265 .sr(1)
16266 .m(m)
16267 .n(n)
16268 .k(k)
16269 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016270 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016271 }
16272 }
16273 }
16274 }
16275
16276 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8) {
16277 TEST_REQUIRES_X86_SSE2;
16278 for (uint32_t n = 16; n <= 24; n += 8) {
16279 for (size_t k = 1; k <= 20; k += 5) {
16280 GemmMicrokernelTester()
16281 .mr(1)
16282 .nr(8)
16283 .kr(1)
16284 .sr(1)
16285 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016286 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016287 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016288 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016289 }
16290 }
16291 }
16292
16293 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8_strided_cn) {
16294 TEST_REQUIRES_X86_SSE2;
16295 for (uint32_t n = 16; n <= 24; n += 8) {
16296 for (size_t k = 1; k <= 20; k += 5) {
16297 GemmMicrokernelTester()
16298 .mr(1)
16299 .nr(8)
16300 .kr(1)
16301 .sr(1)
16302 .m(1)
16303 .n(n)
16304 .k(k)
16305 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016306 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016307 }
16308 }
16309 }
16310
16311 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8_subtile) {
16312 TEST_REQUIRES_X86_SSE2;
16313 for (uint32_t n = 16; n <= 24; n += 8) {
16314 for (size_t k = 1; k <= 20; k += 5) {
16315 for (uint32_t m = 1; m <= 1; m++) {
16316 GemmMicrokernelTester()
16317 .mr(1)
16318 .nr(8)
16319 .kr(1)
16320 .sr(1)
16321 .m(m)
16322 .n(n)
16323 .k(k)
16324 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016325 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016326 }
16327 }
16328 }
16329 }
16330
16331 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, small_kernel) {
16332 TEST_REQUIRES_X86_SSE2;
16333 for (size_t k = 1; k <= 20; k += 5) {
16334 GemmMicrokernelTester()
16335 .mr(1)
16336 .nr(8)
16337 .kr(1)
16338 .sr(1)
16339 .m(1)
16340 .n(8)
16341 .k(k)
16342 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016343 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016344 }
16345 }
16346
16347 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, small_kernel_subtile) {
16348 TEST_REQUIRES_X86_SSE2;
16349 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016350 for (uint32_t n = 1; n <= 8; n++) {
16351 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016352 GemmMicrokernelTester()
16353 .mr(1)
16354 .nr(8)
16355 .kr(1)
16356 .sr(1)
16357 .m(m)
16358 .n(n)
16359 .k(k)
16360 .ks(3)
16361 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016362 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016363 }
16364 }
16365 }
16366 }
16367
16368 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_gt_8_small_kernel) {
16369 TEST_REQUIRES_X86_SSE2;
16370 for (uint32_t n = 9; n < 16; n++) {
16371 for (size_t k = 1; k <= 20; k += 5) {
16372 GemmMicrokernelTester()
16373 .mr(1)
16374 .nr(8)
16375 .kr(1)
16376 .sr(1)
16377 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016378 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016379 .k(k)
16380 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016381 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016382 }
16383 }
16384 }
16385
16386 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, n_div_8_small_kernel) {
16387 TEST_REQUIRES_X86_SSE2;
16388 for (uint32_t n = 16; n <= 24; n += 8) {
16389 for (size_t k = 1; k <= 20; k += 5) {
16390 GemmMicrokernelTester()
16391 .mr(1)
16392 .nr(8)
16393 .kr(1)
16394 .sr(1)
16395 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016396 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016397 .k(k)
16398 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016399 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016400 }
16401 }
16402 }
16403
16404 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, strided_cm_subtile) {
16405 TEST_REQUIRES_X86_SSE2;
16406 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016407 for (uint32_t n = 1; n <= 8; n++) {
16408 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016409 GemmMicrokernelTester()
16410 .mr(1)
16411 .nr(8)
16412 .kr(1)
16413 .sr(1)
16414 .m(m)
16415 .n(n)
16416 .k(k)
16417 .cm_stride(11)
16418 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016419 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016420 }
16421 }
16422 }
16423 }
16424
16425 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, a_offset) {
16426 TEST_REQUIRES_X86_SSE2;
16427 for (size_t k = 1; k <= 20; k += 5) {
16428 GemmMicrokernelTester()
16429 .mr(1)
16430 .nr(8)
16431 .kr(1)
16432 .sr(1)
16433 .m(1)
16434 .n(8)
16435 .k(k)
16436 .ks(3)
16437 .a_offset(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016438 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016439 }
16440 }
16441
16442 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, zero) {
16443 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016444 for (size_t k = 1; k <= 20; k += 5) {
16445 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016446 GemmMicrokernelTester()
16447 .mr(1)
16448 .nr(8)
16449 .kr(1)
16450 .sr(1)
16451 .m(1)
16452 .n(8)
16453 .k(k)
16454 .ks(3)
16455 .a_offset(23)
16456 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016457 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016458 }
16459 }
16460 }
16461
16462 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, qmin) {
16463 TEST_REQUIRES_X86_SSE2;
16464 GemmMicrokernelTester()
16465 .mr(1)
16466 .nr(8)
16467 .kr(1)
16468 .sr(1)
16469 .m(1)
16470 .n(8)
16471 .k(4)
16472 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016473 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016474 }
16475
16476 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, qmax) {
16477 TEST_REQUIRES_X86_SSE2;
16478 GemmMicrokernelTester()
16479 .mr(1)
16480 .nr(8)
16481 .kr(1)
16482 .sr(1)
16483 .m(1)
16484 .n(8)
16485 .k(4)
16486 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016487 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016488 }
16489
16490 TEST(F32_IGEMM_MINMAX_1X8__SSE2_DUP, strided_cm) {
16491 TEST_REQUIRES_X86_SSE2;
16492 GemmMicrokernelTester()
16493 .mr(1)
16494 .nr(8)
16495 .kr(1)
16496 .sr(1)
16497 .m(1)
16498 .n(8)
16499 .k(4)
16500 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016501 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016502 }
16503#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16504
16505
16506#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -080016507 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4) {
16508 TEST_REQUIRES_X86_SSE2;
16509 GemmMicrokernelTester()
16510 .mr(5)
16511 .nr(8)
16512 .kr(1)
16513 .sr(1)
16514 .m(5)
16515 .n(8)
16516 .k(4)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016517 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016518 }
16519
16520 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, strided_cn) {
16521 TEST_REQUIRES_X86_SSE2;
16522 GemmMicrokernelTester()
16523 .mr(5)
16524 .nr(8)
16525 .kr(1)
16526 .sr(1)
16527 .m(5)
16528 .n(8)
16529 .k(4)
16530 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016531 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016532 }
16533
16534 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile) {
16535 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016536 for (uint32_t n = 1; n <= 8; n++) {
16537 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016538 GemmMicrokernelTester()
16539 .mr(5)
16540 .nr(8)
16541 .kr(1)
16542 .sr(1)
16543 .m(m)
16544 .n(n)
16545 .k(4)
16546 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016547 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016548 }
16549 }
16550 }
16551
16552 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_m) {
16553 TEST_REQUIRES_X86_SSE2;
16554 for (uint32_t m = 1; m <= 5; m++) {
16555 GemmMicrokernelTester()
16556 .mr(5)
16557 .nr(8)
16558 .kr(1)
16559 .sr(1)
16560 .m(m)
16561 .n(8)
16562 .k(4)
16563 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016564 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016565 }
16566 }
16567
16568 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_eq_4_subtile_n) {
16569 TEST_REQUIRES_X86_SSE2;
16570 for (uint32_t n = 1; n <= 8; n++) {
16571 GemmMicrokernelTester()
16572 .mr(5)
16573 .nr(8)
16574 .kr(1)
16575 .sr(1)
16576 .m(5)
16577 .n(n)
16578 .k(4)
16579 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016580 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016581 }
16582 }
16583
16584 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_lt_4) {
16585 TEST_REQUIRES_X86_SSE2;
16586 for (size_t k = 1; k < 4; k++) {
16587 GemmMicrokernelTester()
16588 .mr(5)
16589 .nr(8)
16590 .kr(1)
16591 .sr(1)
16592 .m(5)
16593 .n(8)
16594 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016595 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016596 }
16597 }
16598
16599 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_lt_4_subtile) {
16600 TEST_REQUIRES_X86_SSE2;
16601 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016602 for (uint32_t n = 1; n <= 8; n++) {
16603 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016604 GemmMicrokernelTester()
16605 .mr(5)
16606 .nr(8)
16607 .kr(1)
16608 .sr(1)
16609 .m(m)
16610 .n(n)
16611 .k(k)
16612 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016613 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016614 }
16615 }
16616 }
16617 }
16618
16619 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_gt_4) {
16620 TEST_REQUIRES_X86_SSE2;
16621 for (size_t k = 5; k < 8; k++) {
16622 GemmMicrokernelTester()
16623 .mr(5)
16624 .nr(8)
16625 .kr(1)
16626 .sr(1)
16627 .m(5)
16628 .n(8)
16629 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016630 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016631 }
16632 }
16633
16634 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_gt_4_subtile) {
16635 TEST_REQUIRES_X86_SSE2;
16636 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016637 for (uint32_t n = 1; n <= 8; n++) {
16638 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016639 GemmMicrokernelTester()
16640 .mr(5)
16641 .nr(8)
16642 .kr(1)
16643 .sr(1)
16644 .m(m)
16645 .n(n)
16646 .k(k)
16647 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016648 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016649 }
16650 }
16651 }
16652 }
16653
16654 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_div_4) {
16655 TEST_REQUIRES_X86_SSE2;
16656 for (size_t k = 8; k <= 40; k += 4) {
16657 GemmMicrokernelTester()
16658 .mr(5)
16659 .nr(8)
16660 .kr(1)
16661 .sr(1)
16662 .m(5)
16663 .n(8)
16664 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016665 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016666 }
16667 }
16668
16669 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, k_div_4_subtile) {
16670 TEST_REQUIRES_X86_SSE2;
16671 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016672 for (uint32_t n = 1; n <= 8; n++) {
16673 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016674 GemmMicrokernelTester()
16675 .mr(5)
16676 .nr(8)
16677 .kr(1)
16678 .sr(1)
16679 .m(m)
16680 .n(n)
16681 .k(k)
16682 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016683 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016684 }
16685 }
16686 }
16687 }
16688
16689 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8) {
16690 TEST_REQUIRES_X86_SSE2;
16691 for (uint32_t n = 9; n < 16; n++) {
16692 for (size_t k = 1; k <= 20; k += 5) {
16693 GemmMicrokernelTester()
16694 .mr(5)
16695 .nr(8)
16696 .kr(1)
16697 .sr(1)
16698 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016699 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016700 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016701 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016702 }
16703 }
16704 }
16705
16706 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_strided_cn) {
16707 TEST_REQUIRES_X86_SSE2;
16708 for (uint32_t n = 9; n < 16; n++) {
16709 for (size_t k = 1; k <= 20; k += 5) {
16710 GemmMicrokernelTester()
16711 .mr(5)
16712 .nr(8)
16713 .kr(1)
16714 .sr(1)
16715 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016716 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016717 .k(k)
16718 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016719 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016720 }
16721 }
16722 }
16723
16724 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_subtile) {
16725 TEST_REQUIRES_X86_SSE2;
16726 for (uint32_t n = 9; n < 16; n++) {
16727 for (size_t k = 1; k <= 20; k += 5) {
16728 for (uint32_t m = 1; m <= 5; m++) {
16729 GemmMicrokernelTester()
16730 .mr(5)
16731 .nr(8)
16732 .kr(1)
16733 .sr(1)
16734 .m(m)
16735 .n(n)
16736 .k(k)
16737 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016738 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016739 }
16740 }
16741 }
16742 }
16743
16744 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8) {
16745 TEST_REQUIRES_X86_SSE2;
16746 for (uint32_t n = 16; n <= 24; n += 8) {
16747 for (size_t k = 1; k <= 20; k += 5) {
16748 GemmMicrokernelTester()
16749 .mr(5)
16750 .nr(8)
16751 .kr(1)
16752 .sr(1)
16753 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016754 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016755 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016756 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016757 }
16758 }
16759 }
16760
16761 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8_strided_cn) {
16762 TEST_REQUIRES_X86_SSE2;
16763 for (uint32_t n = 16; n <= 24; n += 8) {
16764 for (size_t k = 1; k <= 20; k += 5) {
16765 GemmMicrokernelTester()
16766 .mr(5)
16767 .nr(8)
16768 .kr(1)
16769 .sr(1)
16770 .m(5)
16771 .n(n)
16772 .k(k)
16773 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016774 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016775 }
16776 }
16777 }
16778
16779 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8_subtile) {
16780 TEST_REQUIRES_X86_SSE2;
16781 for (uint32_t n = 16; n <= 24; n += 8) {
16782 for (size_t k = 1; k <= 20; k += 5) {
16783 for (uint32_t m = 1; m <= 5; m++) {
16784 GemmMicrokernelTester()
16785 .mr(5)
16786 .nr(8)
16787 .kr(1)
16788 .sr(1)
16789 .m(m)
16790 .n(n)
16791 .k(k)
16792 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016793 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016794 }
16795 }
16796 }
16797 }
16798
16799 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, small_kernel) {
16800 TEST_REQUIRES_X86_SSE2;
16801 for (size_t k = 1; k <= 20; k += 5) {
16802 GemmMicrokernelTester()
16803 .mr(5)
16804 .nr(8)
16805 .kr(1)
16806 .sr(1)
16807 .m(5)
16808 .n(8)
16809 .k(k)
16810 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016811 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016812 }
16813 }
16814
16815 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, small_kernel_subtile) {
16816 TEST_REQUIRES_X86_SSE2;
16817 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016818 for (uint32_t n = 1; n <= 8; n++) {
16819 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016820 GemmMicrokernelTester()
16821 .mr(5)
16822 .nr(8)
16823 .kr(1)
16824 .sr(1)
16825 .m(m)
16826 .n(n)
16827 .k(k)
16828 .ks(3)
16829 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016830 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016831 }
16832 }
16833 }
16834 }
16835
16836 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_gt_8_small_kernel) {
16837 TEST_REQUIRES_X86_SSE2;
16838 for (uint32_t n = 9; n < 16; n++) {
16839 for (size_t k = 1; k <= 20; k += 5) {
16840 GemmMicrokernelTester()
16841 .mr(5)
16842 .nr(8)
16843 .kr(1)
16844 .sr(1)
16845 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016846 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016847 .k(k)
16848 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016849 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016850 }
16851 }
16852 }
16853
16854 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, n_div_8_small_kernel) {
16855 TEST_REQUIRES_X86_SSE2;
16856 for (uint32_t n = 16; n <= 24; n += 8) {
16857 for (size_t k = 1; k <= 20; k += 5) {
16858 GemmMicrokernelTester()
16859 .mr(5)
16860 .nr(8)
16861 .kr(1)
16862 .sr(1)
16863 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016864 .n(n)
Marat Dukhan802fcae2020-12-11 14:37:25 -080016865 .k(k)
16866 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016867 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016868 }
16869 }
16870 }
16871
16872 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, strided_cm_subtile) {
16873 TEST_REQUIRES_X86_SSE2;
16874 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016875 for (uint32_t n = 1; n <= 8; n++) {
16876 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016877 GemmMicrokernelTester()
16878 .mr(5)
16879 .nr(8)
16880 .kr(1)
16881 .sr(1)
16882 .m(m)
16883 .n(n)
16884 .k(k)
16885 .cm_stride(11)
16886 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016887 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016888 }
16889 }
16890 }
16891 }
16892
16893 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, a_offset) {
16894 TEST_REQUIRES_X86_SSE2;
16895 for (size_t k = 1; k <= 20; k += 5) {
16896 GemmMicrokernelTester()
16897 .mr(5)
16898 .nr(8)
16899 .kr(1)
16900 .sr(1)
16901 .m(5)
16902 .n(8)
16903 .k(k)
16904 .ks(3)
16905 .a_offset(103)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016906 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016907 }
16908 }
16909
16910 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, zero) {
16911 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016912 for (size_t k = 1; k <= 20; k += 5) {
16913 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan802fcae2020-12-11 14:37:25 -080016914 GemmMicrokernelTester()
16915 .mr(5)
16916 .nr(8)
16917 .kr(1)
16918 .sr(1)
16919 .m(5)
16920 .n(8)
16921 .k(k)
16922 .ks(3)
16923 .a_offset(103)
16924 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016925 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016926 }
16927 }
16928 }
16929
16930 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, qmin) {
16931 TEST_REQUIRES_X86_SSE2;
16932 GemmMicrokernelTester()
16933 .mr(5)
16934 .nr(8)
16935 .kr(1)
16936 .sr(1)
16937 .m(5)
16938 .n(8)
16939 .k(4)
16940 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016941 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016942 }
16943
16944 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, qmax) {
16945 TEST_REQUIRES_X86_SSE2;
16946 GemmMicrokernelTester()
16947 .mr(5)
16948 .nr(8)
16949 .kr(1)
16950 .sr(1)
16951 .m(5)
16952 .n(8)
16953 .k(4)
16954 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016955 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016956 }
16957
16958 TEST(F32_IGEMM_MINMAX_5X8__SSE2_DUP, strided_cm) {
16959 TEST_REQUIRES_X86_SSE2;
16960 GemmMicrokernelTester()
16961 .mr(5)
16962 .nr(8)
16963 .kr(1)
16964 .sr(1)
16965 .m(5)
16966 .n(8)
16967 .k(4)
16968 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016969 .Test(xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -080016970 }
16971#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16972
16973
16974#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070016975 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016976 TEST_REQUIRES_X86_AVX;
16977 GemmMicrokernelTester()
16978 .mr(1)
16979 .nr(8)
16980 .kr(1)
16981 .sr(1)
16982 .m(1)
16983 .n(8)
16984 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016985 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070016986 }
16987
Marat Dukhande06f492020-04-09 00:19:31 -070016988 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070016989 TEST_REQUIRES_X86_AVX;
16990 GemmMicrokernelTester()
16991 .mr(1)
16992 .nr(8)
16993 .kr(1)
16994 .sr(1)
16995 .m(1)
16996 .n(8)
16997 .k(1)
16998 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070016999 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017000 }
17001
Marat Dukhande06f492020-04-09 00:19:31 -070017002 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017003 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017004 for (uint32_t n = 1; n <= 8; n++) {
17005 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017006 GemmMicrokernelTester()
17007 .mr(1)
17008 .nr(8)
17009 .kr(1)
17010 .sr(1)
17011 .m(m)
17012 .n(n)
17013 .k(1)
17014 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017015 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017016 }
17017 }
17018 }
17019
Marat Dukhande06f492020-04-09 00:19:31 -070017020 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017021 TEST_REQUIRES_X86_AVX;
17022 for (uint32_t m = 1; m <= 1; m++) {
17023 GemmMicrokernelTester()
17024 .mr(1)
17025 .nr(8)
17026 .kr(1)
17027 .sr(1)
17028 .m(m)
17029 .n(8)
17030 .k(1)
17031 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017032 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017033 }
17034 }
17035
Marat Dukhande06f492020-04-09 00:19:31 -070017036 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017037 TEST_REQUIRES_X86_AVX;
17038 for (uint32_t n = 1; n <= 8; n++) {
17039 GemmMicrokernelTester()
17040 .mr(1)
17041 .nr(8)
17042 .kr(1)
17043 .sr(1)
17044 .m(1)
17045 .n(n)
17046 .k(1)
17047 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017048 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017049 }
17050 }
17051
Marat Dukhande06f492020-04-09 00:19:31 -070017052 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017053 TEST_REQUIRES_X86_AVX;
17054 for (size_t k = 2; k < 10; k++) {
17055 GemmMicrokernelTester()
17056 .mr(1)
17057 .nr(8)
17058 .kr(1)
17059 .sr(1)
17060 .m(1)
17061 .n(8)
17062 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017063 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017064 }
17065 }
17066
Marat Dukhande06f492020-04-09 00:19:31 -070017067 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017068 TEST_REQUIRES_X86_AVX;
17069 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017070 for (uint32_t n = 1; n <= 8; n++) {
17071 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017072 GemmMicrokernelTester()
17073 .mr(1)
17074 .nr(8)
17075 .kr(1)
17076 .sr(1)
17077 .m(m)
17078 .n(n)
17079 .k(k)
17080 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017081 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017082 }
17083 }
17084 }
17085 }
17086
Marat Dukhande06f492020-04-09 00:19:31 -070017087 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017088 TEST_REQUIRES_X86_AVX;
17089 for (uint32_t n = 9; n < 16; n++) {
17090 for (size_t k = 1; k <= 5; k += 2) {
17091 GemmMicrokernelTester()
17092 .mr(1)
17093 .nr(8)
17094 .kr(1)
17095 .sr(1)
17096 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017097 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017098 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017099 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017100 }
17101 }
17102 }
17103
Marat Dukhande06f492020-04-09 00:19:31 -070017104 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017105 TEST_REQUIRES_X86_AVX;
17106 for (uint32_t n = 9; n < 16; n++) {
17107 for (size_t k = 1; k <= 5; k += 2) {
17108 GemmMicrokernelTester()
17109 .mr(1)
17110 .nr(8)
17111 .kr(1)
17112 .sr(1)
17113 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017114 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017115 .k(k)
17116 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017117 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017118 }
17119 }
17120 }
17121
Marat Dukhande06f492020-04-09 00:19:31 -070017122 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017123 TEST_REQUIRES_X86_AVX;
17124 for (uint32_t n = 9; n < 16; n++) {
17125 for (size_t k = 1; k <= 5; k += 2) {
17126 for (uint32_t m = 1; m <= 1; m++) {
17127 GemmMicrokernelTester()
17128 .mr(1)
17129 .nr(8)
17130 .kr(1)
17131 .sr(1)
17132 .m(m)
17133 .n(n)
17134 .k(k)
17135 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017136 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017137 }
17138 }
17139 }
17140 }
17141
Marat Dukhande06f492020-04-09 00:19:31 -070017142 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017143 TEST_REQUIRES_X86_AVX;
17144 for (uint32_t n = 16; n <= 24; n += 8) {
17145 for (size_t k = 1; k <= 5; k += 2) {
17146 GemmMicrokernelTester()
17147 .mr(1)
17148 .nr(8)
17149 .kr(1)
17150 .sr(1)
17151 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017152 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017153 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017154 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017155 }
17156 }
17157 }
17158
Marat Dukhande06f492020-04-09 00:19:31 -070017159 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017160 TEST_REQUIRES_X86_AVX;
17161 for (uint32_t n = 16; n <= 24; n += 8) {
17162 for (size_t k = 1; k <= 5; k += 2) {
17163 GemmMicrokernelTester()
17164 .mr(1)
17165 .nr(8)
17166 .kr(1)
17167 .sr(1)
17168 .m(1)
17169 .n(n)
17170 .k(k)
17171 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017172 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017173 }
17174 }
17175 }
17176
Marat Dukhande06f492020-04-09 00:19:31 -070017177 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017178 TEST_REQUIRES_X86_AVX;
17179 for (uint32_t n = 16; n <= 24; n += 8) {
17180 for (size_t k = 1; k <= 5; k += 2) {
17181 for (uint32_t m = 1; m <= 1; m++) {
17182 GemmMicrokernelTester()
17183 .mr(1)
17184 .nr(8)
17185 .kr(1)
17186 .sr(1)
17187 .m(m)
17188 .n(n)
17189 .k(k)
17190 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017191 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017192 }
17193 }
17194 }
17195 }
17196
Marat Dukhande06f492020-04-09 00:19:31 -070017197 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017198 TEST_REQUIRES_X86_AVX;
17199 for (size_t k = 1; k <= 5; k += 2) {
17200 GemmMicrokernelTester()
17201 .mr(1)
17202 .nr(8)
17203 .kr(1)
17204 .sr(1)
17205 .m(1)
17206 .n(8)
17207 .k(k)
17208 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017209 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017210 }
17211 }
17212
Marat Dukhande06f492020-04-09 00:19:31 -070017213 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017214 TEST_REQUIRES_X86_AVX;
17215 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017216 for (uint32_t n = 1; n <= 8; n++) {
17217 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017218 GemmMicrokernelTester()
17219 .mr(1)
17220 .nr(8)
17221 .kr(1)
17222 .sr(1)
17223 .m(m)
17224 .n(n)
17225 .k(k)
17226 .ks(3)
17227 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017228 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017229 }
17230 }
17231 }
17232 }
17233
Marat Dukhande06f492020-04-09 00:19:31 -070017234 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017235 TEST_REQUIRES_X86_AVX;
17236 for (uint32_t n = 9; n < 16; n++) {
17237 for (size_t k = 1; k <= 5; k += 2) {
17238 GemmMicrokernelTester()
17239 .mr(1)
17240 .nr(8)
17241 .kr(1)
17242 .sr(1)
17243 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017244 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017245 .k(k)
17246 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017247 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017248 }
17249 }
17250 }
17251
Marat Dukhande06f492020-04-09 00:19:31 -070017252 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017253 TEST_REQUIRES_X86_AVX;
17254 for (uint32_t n = 16; n <= 24; n += 8) {
17255 for (size_t k = 1; k <= 5; k += 2) {
17256 GemmMicrokernelTester()
17257 .mr(1)
17258 .nr(8)
17259 .kr(1)
17260 .sr(1)
17261 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017262 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017263 .k(k)
17264 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017265 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017266 }
17267 }
17268 }
17269
Marat Dukhande06f492020-04-09 00:19:31 -070017270 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017271 TEST_REQUIRES_X86_AVX;
17272 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017273 for (uint32_t n = 1; n <= 8; n++) {
17274 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017275 GemmMicrokernelTester()
17276 .mr(1)
17277 .nr(8)
17278 .kr(1)
17279 .sr(1)
17280 .m(m)
17281 .n(n)
17282 .k(k)
17283 .cm_stride(11)
17284 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017285 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017286 }
17287 }
17288 }
17289 }
17290
Marat Dukhande06f492020-04-09 00:19:31 -070017291 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017292 TEST_REQUIRES_X86_AVX;
17293 for (size_t k = 1; k <= 5; k += 2) {
17294 GemmMicrokernelTester()
17295 .mr(1)
17296 .nr(8)
17297 .kr(1)
17298 .sr(1)
17299 .m(1)
17300 .n(8)
17301 .k(k)
17302 .ks(3)
17303 .a_offset(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017304 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017305 }
17306 }
17307
Marat Dukhande06f492020-04-09 00:19:31 -070017308 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017309 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017310 for (size_t k = 1; k <= 5; k += 2) {
17311 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017312 GemmMicrokernelTester()
17313 .mr(1)
17314 .nr(8)
17315 .kr(1)
17316 .sr(1)
17317 .m(1)
17318 .n(8)
17319 .k(k)
17320 .ks(3)
17321 .a_offset(7)
17322 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017323 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017324 }
17325 }
17326 }
17327
Marat Dukhande06f492020-04-09 00:19:31 -070017328 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017329 TEST_REQUIRES_X86_AVX;
17330 GemmMicrokernelTester()
17331 .mr(1)
17332 .nr(8)
17333 .kr(1)
17334 .sr(1)
17335 .m(1)
17336 .n(8)
17337 .k(1)
17338 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017339 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017340 }
17341
Marat Dukhande06f492020-04-09 00:19:31 -070017342 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017343 TEST_REQUIRES_X86_AVX;
17344 GemmMicrokernelTester()
17345 .mr(1)
17346 .nr(8)
17347 .kr(1)
17348 .sr(1)
17349 .m(1)
17350 .n(8)
17351 .k(1)
17352 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017353 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017354 }
17355
Marat Dukhande06f492020-04-09 00:19:31 -070017356 TEST(F32_IGEMM_MINMAX_1X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017357 TEST_REQUIRES_X86_AVX;
17358 GemmMicrokernelTester()
17359 .mr(1)
17360 .nr(8)
17361 .kr(1)
17362 .sr(1)
17363 .m(1)
17364 .n(8)
17365 .k(1)
17366 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017367 .Test(xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017368 }
17369#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17370
17371
17372#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017373 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017374 TEST_REQUIRES_X86_AVX;
17375 GemmMicrokernelTester()
17376 .mr(5)
17377 .nr(8)
17378 .kr(1)
17379 .sr(1)
17380 .m(5)
17381 .n(8)
17382 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017383 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017384 }
17385
Marat Dukhande06f492020-04-09 00:19:31 -070017386 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017387 TEST_REQUIRES_X86_AVX;
17388 GemmMicrokernelTester()
17389 .mr(5)
17390 .nr(8)
17391 .kr(1)
17392 .sr(1)
17393 .m(5)
17394 .n(8)
17395 .k(1)
17396 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017397 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017398 }
17399
Marat Dukhande06f492020-04-09 00:19:31 -070017400 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017401 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017402 for (uint32_t n = 1; n <= 8; n++) {
17403 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017404 GemmMicrokernelTester()
17405 .mr(5)
17406 .nr(8)
17407 .kr(1)
17408 .sr(1)
17409 .m(m)
17410 .n(n)
17411 .k(1)
17412 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017413 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017414 }
17415 }
17416 }
17417
Marat Dukhande06f492020-04-09 00:19:31 -070017418 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017419 TEST_REQUIRES_X86_AVX;
17420 for (uint32_t m = 1; m <= 5; m++) {
17421 GemmMicrokernelTester()
17422 .mr(5)
17423 .nr(8)
17424 .kr(1)
17425 .sr(1)
17426 .m(m)
17427 .n(8)
17428 .k(1)
17429 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017430 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017431 }
17432 }
17433
Marat Dukhande06f492020-04-09 00:19:31 -070017434 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017435 TEST_REQUIRES_X86_AVX;
17436 for (uint32_t n = 1; n <= 8; n++) {
17437 GemmMicrokernelTester()
17438 .mr(5)
17439 .nr(8)
17440 .kr(1)
17441 .sr(1)
17442 .m(5)
17443 .n(n)
17444 .k(1)
17445 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017446 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017447 }
17448 }
17449
Marat Dukhande06f492020-04-09 00:19:31 -070017450 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017451 TEST_REQUIRES_X86_AVX;
17452 for (size_t k = 2; k < 10; k++) {
17453 GemmMicrokernelTester()
17454 .mr(5)
17455 .nr(8)
17456 .kr(1)
17457 .sr(1)
17458 .m(5)
17459 .n(8)
17460 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017461 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017462 }
17463 }
17464
Marat Dukhande06f492020-04-09 00:19:31 -070017465 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017466 TEST_REQUIRES_X86_AVX;
17467 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017468 for (uint32_t n = 1; n <= 8; n++) {
17469 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017470 GemmMicrokernelTester()
17471 .mr(5)
17472 .nr(8)
17473 .kr(1)
17474 .sr(1)
17475 .m(m)
17476 .n(n)
17477 .k(k)
17478 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017479 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017480 }
17481 }
17482 }
17483 }
17484
Marat Dukhande06f492020-04-09 00:19:31 -070017485 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017486 TEST_REQUIRES_X86_AVX;
17487 for (uint32_t n = 9; n < 16; n++) {
17488 for (size_t k = 1; k <= 5; k += 2) {
17489 GemmMicrokernelTester()
17490 .mr(5)
17491 .nr(8)
17492 .kr(1)
17493 .sr(1)
17494 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017495 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017496 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017497 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017498 }
17499 }
17500 }
17501
Marat Dukhande06f492020-04-09 00:19:31 -070017502 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017503 TEST_REQUIRES_X86_AVX;
17504 for (uint32_t n = 9; n < 16; n++) {
17505 for (size_t k = 1; k <= 5; k += 2) {
17506 GemmMicrokernelTester()
17507 .mr(5)
17508 .nr(8)
17509 .kr(1)
17510 .sr(1)
17511 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017512 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017513 .k(k)
17514 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017515 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017516 }
17517 }
17518 }
17519
Marat Dukhande06f492020-04-09 00:19:31 -070017520 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017521 TEST_REQUIRES_X86_AVX;
17522 for (uint32_t n = 9; n < 16; n++) {
17523 for (size_t k = 1; k <= 5; k += 2) {
17524 for (uint32_t m = 1; m <= 5; m++) {
17525 GemmMicrokernelTester()
17526 .mr(5)
17527 .nr(8)
17528 .kr(1)
17529 .sr(1)
17530 .m(m)
17531 .n(n)
17532 .k(k)
17533 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017534 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017535 }
17536 }
17537 }
17538 }
17539
Marat Dukhande06f492020-04-09 00:19:31 -070017540 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017541 TEST_REQUIRES_X86_AVX;
17542 for (uint32_t n = 16; n <= 24; n += 8) {
17543 for (size_t k = 1; k <= 5; k += 2) {
17544 GemmMicrokernelTester()
17545 .mr(5)
17546 .nr(8)
17547 .kr(1)
17548 .sr(1)
17549 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017550 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017551 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017552 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017553 }
17554 }
17555 }
17556
Marat Dukhande06f492020-04-09 00:19:31 -070017557 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017558 TEST_REQUIRES_X86_AVX;
17559 for (uint32_t n = 16; n <= 24; n += 8) {
17560 for (size_t k = 1; k <= 5; k += 2) {
17561 GemmMicrokernelTester()
17562 .mr(5)
17563 .nr(8)
17564 .kr(1)
17565 .sr(1)
17566 .m(5)
17567 .n(n)
17568 .k(k)
17569 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017570 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017571 }
17572 }
17573 }
17574
Marat Dukhande06f492020-04-09 00:19:31 -070017575 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017576 TEST_REQUIRES_X86_AVX;
17577 for (uint32_t n = 16; n <= 24; n += 8) {
17578 for (size_t k = 1; k <= 5; k += 2) {
17579 for (uint32_t m = 1; m <= 5; m++) {
17580 GemmMicrokernelTester()
17581 .mr(5)
17582 .nr(8)
17583 .kr(1)
17584 .sr(1)
17585 .m(m)
17586 .n(n)
17587 .k(k)
17588 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017589 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017590 }
17591 }
17592 }
17593 }
17594
Marat Dukhande06f492020-04-09 00:19:31 -070017595 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017596 TEST_REQUIRES_X86_AVX;
17597 for (size_t k = 1; k <= 5; k += 2) {
17598 GemmMicrokernelTester()
17599 .mr(5)
17600 .nr(8)
17601 .kr(1)
17602 .sr(1)
17603 .m(5)
17604 .n(8)
17605 .k(k)
17606 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017607 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017608 }
17609 }
17610
Marat Dukhande06f492020-04-09 00:19:31 -070017611 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017612 TEST_REQUIRES_X86_AVX;
17613 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017614 for (uint32_t n = 1; n <= 8; n++) {
17615 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017616 GemmMicrokernelTester()
17617 .mr(5)
17618 .nr(8)
17619 .kr(1)
17620 .sr(1)
17621 .m(m)
17622 .n(n)
17623 .k(k)
17624 .ks(3)
17625 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017626 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017627 }
17628 }
17629 }
17630 }
17631
Marat Dukhande06f492020-04-09 00:19:31 -070017632 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017633 TEST_REQUIRES_X86_AVX;
17634 for (uint32_t n = 9; n < 16; n++) {
17635 for (size_t k = 1; k <= 5; k += 2) {
17636 GemmMicrokernelTester()
17637 .mr(5)
17638 .nr(8)
17639 .kr(1)
17640 .sr(1)
17641 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017642 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017643 .k(k)
17644 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017645 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017646 }
17647 }
17648 }
17649
Marat Dukhande06f492020-04-09 00:19:31 -070017650 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017651 TEST_REQUIRES_X86_AVX;
17652 for (uint32_t n = 16; n <= 24; n += 8) {
17653 for (size_t k = 1; k <= 5; k += 2) {
17654 GemmMicrokernelTester()
17655 .mr(5)
17656 .nr(8)
17657 .kr(1)
17658 .sr(1)
17659 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017660 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017661 .k(k)
17662 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017663 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017664 }
17665 }
17666 }
17667
Marat Dukhande06f492020-04-09 00:19:31 -070017668 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017669 TEST_REQUIRES_X86_AVX;
17670 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017671 for (uint32_t n = 1; n <= 8; n++) {
17672 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017673 GemmMicrokernelTester()
17674 .mr(5)
17675 .nr(8)
17676 .kr(1)
17677 .sr(1)
17678 .m(m)
17679 .n(n)
17680 .k(k)
17681 .cm_stride(11)
17682 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017683 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017684 }
17685 }
17686 }
17687 }
17688
Marat Dukhande06f492020-04-09 00:19:31 -070017689 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017690 TEST_REQUIRES_X86_AVX;
17691 for (size_t k = 1; k <= 5; k += 2) {
17692 GemmMicrokernelTester()
17693 .mr(5)
17694 .nr(8)
17695 .kr(1)
17696 .sr(1)
17697 .m(5)
17698 .n(8)
17699 .k(k)
17700 .ks(3)
17701 .a_offset(29)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017702 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017703 }
17704 }
17705
Marat Dukhande06f492020-04-09 00:19:31 -070017706 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017707 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017708 for (size_t k = 1; k <= 5; k += 2) {
17709 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017710 GemmMicrokernelTester()
17711 .mr(5)
17712 .nr(8)
17713 .kr(1)
17714 .sr(1)
17715 .m(5)
17716 .n(8)
17717 .k(k)
17718 .ks(3)
17719 .a_offset(29)
17720 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017721 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017722 }
17723 }
17724 }
17725
Marat Dukhande06f492020-04-09 00:19:31 -070017726 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017727 TEST_REQUIRES_X86_AVX;
17728 GemmMicrokernelTester()
17729 .mr(5)
17730 .nr(8)
17731 .kr(1)
17732 .sr(1)
17733 .m(5)
17734 .n(8)
17735 .k(1)
17736 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017737 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017738 }
17739
Marat Dukhande06f492020-04-09 00:19:31 -070017740 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017741 TEST_REQUIRES_X86_AVX;
17742 GemmMicrokernelTester()
17743 .mr(5)
17744 .nr(8)
17745 .kr(1)
17746 .sr(1)
17747 .m(5)
17748 .n(8)
17749 .k(1)
17750 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017751 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017752 }
17753
Marat Dukhande06f492020-04-09 00:19:31 -070017754 TEST(F32_IGEMM_MINMAX_5X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017755 TEST_REQUIRES_X86_AVX;
17756 GemmMicrokernelTester()
17757 .mr(5)
17758 .nr(8)
17759 .kr(1)
17760 .sr(1)
17761 .m(5)
17762 .n(8)
17763 .k(1)
17764 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017765 .Test(xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017766 }
17767#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17768
17769
17770#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070017771 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017772 TEST_REQUIRES_X86_AVX;
17773 GemmMicrokernelTester()
17774 .mr(6)
17775 .nr(8)
17776 .kr(1)
17777 .sr(1)
17778 .m(6)
17779 .n(8)
17780 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017781 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017782 }
17783
Marat Dukhande06f492020-04-09 00:19:31 -070017784 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017785 TEST_REQUIRES_X86_AVX;
17786 GemmMicrokernelTester()
17787 .mr(6)
17788 .nr(8)
17789 .kr(1)
17790 .sr(1)
17791 .m(6)
17792 .n(8)
17793 .k(1)
17794 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017795 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017796 }
17797
Marat Dukhande06f492020-04-09 00:19:31 -070017798 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017799 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017800 for (uint32_t n = 1; n <= 8; n++) {
17801 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017802 GemmMicrokernelTester()
17803 .mr(6)
17804 .nr(8)
17805 .kr(1)
17806 .sr(1)
17807 .m(m)
17808 .n(n)
17809 .k(1)
17810 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017811 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017812 }
17813 }
17814 }
17815
Marat Dukhande06f492020-04-09 00:19:31 -070017816 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017817 TEST_REQUIRES_X86_AVX;
17818 for (uint32_t m = 1; m <= 6; m++) {
17819 GemmMicrokernelTester()
17820 .mr(6)
17821 .nr(8)
17822 .kr(1)
17823 .sr(1)
17824 .m(m)
17825 .n(8)
17826 .k(1)
17827 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017828 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017829 }
17830 }
17831
Marat Dukhande06f492020-04-09 00:19:31 -070017832 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017833 TEST_REQUIRES_X86_AVX;
17834 for (uint32_t n = 1; n <= 8; n++) {
17835 GemmMicrokernelTester()
17836 .mr(6)
17837 .nr(8)
17838 .kr(1)
17839 .sr(1)
17840 .m(6)
17841 .n(n)
17842 .k(1)
17843 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017844 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017845 }
17846 }
17847
Marat Dukhande06f492020-04-09 00:19:31 -070017848 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017849 TEST_REQUIRES_X86_AVX;
17850 for (size_t k = 2; k < 10; k++) {
17851 GemmMicrokernelTester()
17852 .mr(6)
17853 .nr(8)
17854 .kr(1)
17855 .sr(1)
17856 .m(6)
17857 .n(8)
17858 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017859 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017860 }
17861 }
17862
Marat Dukhande06f492020-04-09 00:19:31 -070017863 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017864 TEST_REQUIRES_X86_AVX;
17865 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017866 for (uint32_t n = 1; n <= 8; n++) {
17867 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017868 GemmMicrokernelTester()
17869 .mr(6)
17870 .nr(8)
17871 .kr(1)
17872 .sr(1)
17873 .m(m)
17874 .n(n)
17875 .k(k)
17876 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017877 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017878 }
17879 }
17880 }
17881 }
17882
Marat Dukhande06f492020-04-09 00:19:31 -070017883 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017884 TEST_REQUIRES_X86_AVX;
17885 for (uint32_t n = 9; n < 16; n++) {
17886 for (size_t k = 1; k <= 5; k += 2) {
17887 GemmMicrokernelTester()
17888 .mr(6)
17889 .nr(8)
17890 .kr(1)
17891 .sr(1)
17892 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017893 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017894 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017895 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017896 }
17897 }
17898 }
17899
Marat Dukhande06f492020-04-09 00:19:31 -070017900 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017901 TEST_REQUIRES_X86_AVX;
17902 for (uint32_t n = 9; n < 16; n++) {
17903 for (size_t k = 1; k <= 5; k += 2) {
17904 GemmMicrokernelTester()
17905 .mr(6)
17906 .nr(8)
17907 .kr(1)
17908 .sr(1)
17909 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017910 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017911 .k(k)
17912 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017913 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017914 }
17915 }
17916 }
17917
Marat Dukhande06f492020-04-09 00:19:31 -070017918 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017919 TEST_REQUIRES_X86_AVX;
17920 for (uint32_t n = 9; n < 16; n++) {
17921 for (size_t k = 1; k <= 5; k += 2) {
17922 for (uint32_t m = 1; m <= 6; m++) {
17923 GemmMicrokernelTester()
17924 .mr(6)
17925 .nr(8)
17926 .kr(1)
17927 .sr(1)
17928 .m(m)
17929 .n(n)
17930 .k(k)
17931 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017932 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017933 }
17934 }
17935 }
17936 }
17937
Marat Dukhande06f492020-04-09 00:19:31 -070017938 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017939 TEST_REQUIRES_X86_AVX;
17940 for (uint32_t n = 16; n <= 24; n += 8) {
17941 for (size_t k = 1; k <= 5; k += 2) {
17942 GemmMicrokernelTester()
17943 .mr(6)
17944 .nr(8)
17945 .kr(1)
17946 .sr(1)
17947 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017948 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070017949 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017950 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017951 }
17952 }
17953 }
17954
Marat Dukhande06f492020-04-09 00:19:31 -070017955 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017956 TEST_REQUIRES_X86_AVX;
17957 for (uint32_t n = 16; n <= 24; n += 8) {
17958 for (size_t k = 1; k <= 5; k += 2) {
17959 GemmMicrokernelTester()
17960 .mr(6)
17961 .nr(8)
17962 .kr(1)
17963 .sr(1)
17964 .m(6)
17965 .n(n)
17966 .k(k)
17967 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017968 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017969 }
17970 }
17971 }
17972
Marat Dukhande06f492020-04-09 00:19:31 -070017973 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017974 TEST_REQUIRES_X86_AVX;
17975 for (uint32_t n = 16; n <= 24; n += 8) {
17976 for (size_t k = 1; k <= 5; k += 2) {
17977 for (uint32_t m = 1; m <= 6; m++) {
17978 GemmMicrokernelTester()
17979 .mr(6)
17980 .nr(8)
17981 .kr(1)
17982 .sr(1)
17983 .m(m)
17984 .n(n)
17985 .k(k)
17986 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070017987 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070017988 }
17989 }
17990 }
17991 }
17992
Marat Dukhande06f492020-04-09 00:19:31 -070017993 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070017994 TEST_REQUIRES_X86_AVX;
17995 for (size_t k = 1; k <= 5; k += 2) {
17996 GemmMicrokernelTester()
17997 .mr(6)
17998 .nr(8)
17999 .kr(1)
18000 .sr(1)
18001 .m(6)
18002 .n(8)
18003 .k(k)
18004 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018005 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018006 }
18007 }
18008
Marat Dukhande06f492020-04-09 00:19:31 -070018009 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018010 TEST_REQUIRES_X86_AVX;
18011 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018012 for (uint32_t n = 1; n <= 8; n++) {
18013 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018014 GemmMicrokernelTester()
18015 .mr(6)
18016 .nr(8)
18017 .kr(1)
18018 .sr(1)
18019 .m(m)
18020 .n(n)
18021 .k(k)
18022 .ks(3)
18023 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018024 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018025 }
18026 }
18027 }
18028 }
18029
Marat Dukhande06f492020-04-09 00:19:31 -070018030 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018031 TEST_REQUIRES_X86_AVX;
18032 for (uint32_t n = 9; n < 16; n++) {
18033 for (size_t k = 1; k <= 5; k += 2) {
18034 GemmMicrokernelTester()
18035 .mr(6)
18036 .nr(8)
18037 .kr(1)
18038 .sr(1)
18039 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018040 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018041 .k(k)
18042 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018043 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018044 }
18045 }
18046 }
18047
Marat Dukhande06f492020-04-09 00:19:31 -070018048 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018049 TEST_REQUIRES_X86_AVX;
18050 for (uint32_t n = 16; n <= 24; n += 8) {
18051 for (size_t k = 1; k <= 5; k += 2) {
18052 GemmMicrokernelTester()
18053 .mr(6)
18054 .nr(8)
18055 .kr(1)
18056 .sr(1)
18057 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018058 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018059 .k(k)
18060 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018061 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018062 }
18063 }
18064 }
18065
Marat Dukhande06f492020-04-09 00:19:31 -070018066 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018067 TEST_REQUIRES_X86_AVX;
18068 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018069 for (uint32_t n = 1; n <= 8; n++) {
18070 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018071 GemmMicrokernelTester()
18072 .mr(6)
18073 .nr(8)
18074 .kr(1)
18075 .sr(1)
18076 .m(m)
18077 .n(n)
18078 .k(k)
18079 .cm_stride(11)
18080 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018081 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018082 }
18083 }
18084 }
18085 }
18086
Marat Dukhande06f492020-04-09 00:19:31 -070018087 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018088 TEST_REQUIRES_X86_AVX;
18089 for (size_t k = 1; k <= 5; k += 2) {
18090 GemmMicrokernelTester()
18091 .mr(6)
18092 .nr(8)
18093 .kr(1)
18094 .sr(1)
18095 .m(6)
18096 .n(8)
18097 .k(k)
18098 .ks(3)
18099 .a_offset(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018100 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018101 }
18102 }
18103
Marat Dukhande06f492020-04-09 00:19:31 -070018104 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018105 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018106 for (size_t k = 1; k <= 5; k += 2) {
18107 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018108 GemmMicrokernelTester()
18109 .mr(6)
18110 .nr(8)
18111 .kr(1)
18112 .sr(1)
18113 .m(6)
18114 .n(8)
18115 .k(k)
18116 .ks(3)
18117 .a_offset(37)
18118 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018119 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018120 }
18121 }
18122 }
18123
Marat Dukhande06f492020-04-09 00:19:31 -070018124 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018125 TEST_REQUIRES_X86_AVX;
18126 GemmMicrokernelTester()
18127 .mr(6)
18128 .nr(8)
18129 .kr(1)
18130 .sr(1)
18131 .m(6)
18132 .n(8)
18133 .k(1)
18134 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018135 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018136 }
18137
Marat Dukhande06f492020-04-09 00:19:31 -070018138 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018139 TEST_REQUIRES_X86_AVX;
18140 GemmMicrokernelTester()
18141 .mr(6)
18142 .nr(8)
18143 .kr(1)
18144 .sr(1)
18145 .m(6)
18146 .n(8)
18147 .k(1)
18148 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018149 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018150 }
18151
Marat Dukhande06f492020-04-09 00:19:31 -070018152 TEST(F32_IGEMM_MINMAX_6X8__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018153 TEST_REQUIRES_X86_AVX;
18154 GemmMicrokernelTester()
18155 .mr(6)
18156 .nr(8)
18157 .kr(1)
18158 .sr(1)
18159 .m(6)
18160 .n(8)
18161 .k(1)
18162 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018163 .Test(xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018164 }
18165#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18166
18167
18168#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018169 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018170 TEST_REQUIRES_X86_AVX;
18171 GemmMicrokernelTester()
18172 .mr(1)
18173 .nr(16)
18174 .kr(1)
18175 .sr(1)
18176 .m(1)
18177 .n(16)
18178 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018179 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018180 }
18181
Marat Dukhande06f492020-04-09 00:19:31 -070018182 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018183 TEST_REQUIRES_X86_AVX;
18184 GemmMicrokernelTester()
18185 .mr(1)
18186 .nr(16)
18187 .kr(1)
18188 .sr(1)
18189 .m(1)
18190 .n(16)
18191 .k(1)
18192 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018193 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018194 }
18195
Marat Dukhande06f492020-04-09 00:19:31 -070018196 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018197 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018198 for (uint32_t n = 1; n <= 16; n++) {
18199 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018200 GemmMicrokernelTester()
18201 .mr(1)
18202 .nr(16)
18203 .kr(1)
18204 .sr(1)
18205 .m(m)
18206 .n(n)
18207 .k(1)
18208 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018209 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018210 }
18211 }
18212 }
18213
Marat Dukhande06f492020-04-09 00:19:31 -070018214 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018215 TEST_REQUIRES_X86_AVX;
18216 for (uint32_t m = 1; m <= 1; m++) {
18217 GemmMicrokernelTester()
18218 .mr(1)
18219 .nr(16)
18220 .kr(1)
18221 .sr(1)
18222 .m(m)
18223 .n(16)
18224 .k(1)
18225 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018226 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018227 }
18228 }
18229
Marat Dukhande06f492020-04-09 00:19:31 -070018230 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018231 TEST_REQUIRES_X86_AVX;
18232 for (uint32_t n = 1; n <= 16; n++) {
18233 GemmMicrokernelTester()
18234 .mr(1)
18235 .nr(16)
18236 .kr(1)
18237 .sr(1)
18238 .m(1)
18239 .n(n)
18240 .k(1)
18241 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018242 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018243 }
18244 }
18245
Marat Dukhande06f492020-04-09 00:19:31 -070018246 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018247 TEST_REQUIRES_X86_AVX;
18248 for (size_t k = 2; k < 10; k++) {
18249 GemmMicrokernelTester()
18250 .mr(1)
18251 .nr(16)
18252 .kr(1)
18253 .sr(1)
18254 .m(1)
18255 .n(16)
18256 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018257 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018258 }
18259 }
18260
Marat Dukhande06f492020-04-09 00:19:31 -070018261 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018262 TEST_REQUIRES_X86_AVX;
18263 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018264 for (uint32_t n = 1; n <= 16; n++) {
18265 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018266 GemmMicrokernelTester()
18267 .mr(1)
18268 .nr(16)
18269 .kr(1)
18270 .sr(1)
18271 .m(m)
18272 .n(n)
18273 .k(k)
18274 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018275 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018276 }
18277 }
18278 }
18279 }
18280
Marat Dukhande06f492020-04-09 00:19:31 -070018281 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018282 TEST_REQUIRES_X86_AVX;
18283 for (uint32_t n = 17; n < 32; n++) {
18284 for (size_t k = 1; k <= 5; k += 2) {
18285 GemmMicrokernelTester()
18286 .mr(1)
18287 .nr(16)
18288 .kr(1)
18289 .sr(1)
18290 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018291 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018292 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018293 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018294 }
18295 }
18296 }
18297
Marat Dukhande06f492020-04-09 00:19:31 -070018298 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018299 TEST_REQUIRES_X86_AVX;
18300 for (uint32_t n = 17; n < 32; n++) {
18301 for (size_t k = 1; k <= 5; k += 2) {
18302 GemmMicrokernelTester()
18303 .mr(1)
18304 .nr(16)
18305 .kr(1)
18306 .sr(1)
18307 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018308 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018309 .k(k)
18310 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018311 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018312 }
18313 }
18314 }
18315
Marat Dukhande06f492020-04-09 00:19:31 -070018316 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018317 TEST_REQUIRES_X86_AVX;
18318 for (uint32_t n = 17; n < 32; n++) {
18319 for (size_t k = 1; k <= 5; k += 2) {
18320 for (uint32_t m = 1; m <= 1; m++) {
18321 GemmMicrokernelTester()
18322 .mr(1)
18323 .nr(16)
18324 .kr(1)
18325 .sr(1)
18326 .m(m)
18327 .n(n)
18328 .k(k)
18329 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018330 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018331 }
18332 }
18333 }
18334 }
18335
Marat Dukhande06f492020-04-09 00:19:31 -070018336 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018337 TEST_REQUIRES_X86_AVX;
18338 for (uint32_t n = 32; n <= 48; n += 16) {
18339 for (size_t k = 1; k <= 5; k += 2) {
18340 GemmMicrokernelTester()
18341 .mr(1)
18342 .nr(16)
18343 .kr(1)
18344 .sr(1)
18345 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018346 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018347 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018348 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018349 }
18350 }
18351 }
18352
Marat Dukhande06f492020-04-09 00:19:31 -070018353 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018354 TEST_REQUIRES_X86_AVX;
18355 for (uint32_t n = 32; n <= 48; n += 16) {
18356 for (size_t k = 1; k <= 5; k += 2) {
18357 GemmMicrokernelTester()
18358 .mr(1)
18359 .nr(16)
18360 .kr(1)
18361 .sr(1)
18362 .m(1)
18363 .n(n)
18364 .k(k)
18365 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018366 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018367 }
18368 }
18369 }
18370
Marat Dukhande06f492020-04-09 00:19:31 -070018371 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018372 TEST_REQUIRES_X86_AVX;
18373 for (uint32_t n = 32; n <= 48; n += 16) {
18374 for (size_t k = 1; k <= 5; k += 2) {
18375 for (uint32_t m = 1; m <= 1; m++) {
18376 GemmMicrokernelTester()
18377 .mr(1)
18378 .nr(16)
18379 .kr(1)
18380 .sr(1)
18381 .m(m)
18382 .n(n)
18383 .k(k)
18384 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018385 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018386 }
18387 }
18388 }
18389 }
18390
Marat Dukhande06f492020-04-09 00:19:31 -070018391 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018392 TEST_REQUIRES_X86_AVX;
18393 for (size_t k = 1; k <= 5; k += 2) {
18394 GemmMicrokernelTester()
18395 .mr(1)
18396 .nr(16)
18397 .kr(1)
18398 .sr(1)
18399 .m(1)
18400 .n(16)
18401 .k(k)
18402 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018403 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018404 }
18405 }
18406
Marat Dukhande06f492020-04-09 00:19:31 -070018407 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018408 TEST_REQUIRES_X86_AVX;
18409 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018410 for (uint32_t n = 1; n <= 16; n++) {
18411 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018412 GemmMicrokernelTester()
18413 .mr(1)
18414 .nr(16)
18415 .kr(1)
18416 .sr(1)
18417 .m(m)
18418 .n(n)
18419 .k(k)
18420 .ks(3)
18421 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018422 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018423 }
18424 }
18425 }
18426 }
18427
Marat Dukhande06f492020-04-09 00:19:31 -070018428 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018429 TEST_REQUIRES_X86_AVX;
18430 for (uint32_t n = 17; n < 32; n++) {
18431 for (size_t k = 1; k <= 5; k += 2) {
18432 GemmMicrokernelTester()
18433 .mr(1)
18434 .nr(16)
18435 .kr(1)
18436 .sr(1)
18437 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018438 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018439 .k(k)
18440 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018441 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018442 }
18443 }
18444 }
18445
Marat Dukhande06f492020-04-09 00:19:31 -070018446 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018447 TEST_REQUIRES_X86_AVX;
18448 for (uint32_t n = 32; n <= 48; n += 16) {
18449 for (size_t k = 1; k <= 5; k += 2) {
18450 GemmMicrokernelTester()
18451 .mr(1)
18452 .nr(16)
18453 .kr(1)
18454 .sr(1)
18455 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018456 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018457 .k(k)
18458 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018459 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018460 }
18461 }
18462 }
18463
Marat Dukhande06f492020-04-09 00:19:31 -070018464 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018465 TEST_REQUIRES_X86_AVX;
18466 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018467 for (uint32_t n = 1; n <= 16; n++) {
18468 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018469 GemmMicrokernelTester()
18470 .mr(1)
18471 .nr(16)
18472 .kr(1)
18473 .sr(1)
18474 .m(m)
18475 .n(n)
18476 .k(k)
18477 .cm_stride(19)
18478 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018479 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018480 }
18481 }
18482 }
18483 }
18484
Marat Dukhande06f492020-04-09 00:19:31 -070018485 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018486 TEST_REQUIRES_X86_AVX;
18487 for (size_t k = 1; k <= 5; k += 2) {
18488 GemmMicrokernelTester()
18489 .mr(1)
18490 .nr(16)
18491 .kr(1)
18492 .sr(1)
18493 .m(1)
18494 .n(16)
18495 .k(k)
18496 .ks(3)
18497 .a_offset(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018498 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018499 }
18500 }
18501
Marat Dukhande06f492020-04-09 00:19:31 -070018502 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018503 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018504 for (size_t k = 1; k <= 5; k += 2) {
18505 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018506 GemmMicrokernelTester()
18507 .mr(1)
18508 .nr(16)
18509 .kr(1)
18510 .sr(1)
18511 .m(1)
18512 .n(16)
18513 .k(k)
18514 .ks(3)
18515 .a_offset(7)
18516 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018517 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018518 }
18519 }
18520 }
18521
Marat Dukhande06f492020-04-09 00:19:31 -070018522 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018523 TEST_REQUIRES_X86_AVX;
18524 GemmMicrokernelTester()
18525 .mr(1)
18526 .nr(16)
18527 .kr(1)
18528 .sr(1)
18529 .m(1)
18530 .n(16)
18531 .k(1)
18532 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018533 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018534 }
18535
Marat Dukhande06f492020-04-09 00:19:31 -070018536 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018537 TEST_REQUIRES_X86_AVX;
18538 GemmMicrokernelTester()
18539 .mr(1)
18540 .nr(16)
18541 .kr(1)
18542 .sr(1)
18543 .m(1)
18544 .n(16)
18545 .k(1)
18546 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018547 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018548 }
18549
Marat Dukhande06f492020-04-09 00:19:31 -070018550 TEST(F32_IGEMM_MINMAX_1X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018551 TEST_REQUIRES_X86_AVX;
18552 GemmMicrokernelTester()
18553 .mr(1)
18554 .nr(16)
18555 .kr(1)
18556 .sr(1)
18557 .m(1)
18558 .n(16)
18559 .k(1)
18560 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018561 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018562 }
18563#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18564
18565
18566#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018567 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018568 TEST_REQUIRES_X86_AVX;
18569 GemmMicrokernelTester()
18570 .mr(4)
18571 .nr(16)
18572 .kr(1)
18573 .sr(1)
18574 .m(4)
18575 .n(16)
18576 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018577 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018578 }
18579
Marat Dukhande06f492020-04-09 00:19:31 -070018580 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018581 TEST_REQUIRES_X86_AVX;
18582 GemmMicrokernelTester()
18583 .mr(4)
18584 .nr(16)
18585 .kr(1)
18586 .sr(1)
18587 .m(4)
18588 .n(16)
18589 .k(1)
18590 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018591 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018592 }
18593
Marat Dukhande06f492020-04-09 00:19:31 -070018594 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018595 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018596 for (uint32_t n = 1; n <= 16; n++) {
18597 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018598 GemmMicrokernelTester()
18599 .mr(4)
18600 .nr(16)
18601 .kr(1)
18602 .sr(1)
18603 .m(m)
18604 .n(n)
18605 .k(1)
18606 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018607 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018608 }
18609 }
18610 }
18611
Marat Dukhande06f492020-04-09 00:19:31 -070018612 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018613 TEST_REQUIRES_X86_AVX;
18614 for (uint32_t m = 1; m <= 4; m++) {
18615 GemmMicrokernelTester()
18616 .mr(4)
18617 .nr(16)
18618 .kr(1)
18619 .sr(1)
18620 .m(m)
18621 .n(16)
18622 .k(1)
18623 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018624 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018625 }
18626 }
18627
Marat Dukhande06f492020-04-09 00:19:31 -070018628 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018629 TEST_REQUIRES_X86_AVX;
18630 for (uint32_t n = 1; n <= 16; n++) {
18631 GemmMicrokernelTester()
18632 .mr(4)
18633 .nr(16)
18634 .kr(1)
18635 .sr(1)
18636 .m(4)
18637 .n(n)
18638 .k(1)
18639 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018640 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018641 }
18642 }
18643
Marat Dukhande06f492020-04-09 00:19:31 -070018644 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018645 TEST_REQUIRES_X86_AVX;
18646 for (size_t k = 2; k < 10; k++) {
18647 GemmMicrokernelTester()
18648 .mr(4)
18649 .nr(16)
18650 .kr(1)
18651 .sr(1)
18652 .m(4)
18653 .n(16)
18654 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018655 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018656 }
18657 }
18658
Marat Dukhande06f492020-04-09 00:19:31 -070018659 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018660 TEST_REQUIRES_X86_AVX;
18661 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018662 for (uint32_t n = 1; n <= 16; n++) {
18663 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018664 GemmMicrokernelTester()
18665 .mr(4)
18666 .nr(16)
18667 .kr(1)
18668 .sr(1)
18669 .m(m)
18670 .n(n)
18671 .k(k)
18672 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018673 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018674 }
18675 }
18676 }
18677 }
18678
Marat Dukhande06f492020-04-09 00:19:31 -070018679 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018680 TEST_REQUIRES_X86_AVX;
18681 for (uint32_t n = 17; n < 32; n++) {
18682 for (size_t k = 1; k <= 5; k += 2) {
18683 GemmMicrokernelTester()
18684 .mr(4)
18685 .nr(16)
18686 .kr(1)
18687 .sr(1)
18688 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018689 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018690 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018691 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018692 }
18693 }
18694 }
18695
Marat Dukhande06f492020-04-09 00:19:31 -070018696 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018697 TEST_REQUIRES_X86_AVX;
18698 for (uint32_t n = 17; n < 32; n++) {
18699 for (size_t k = 1; k <= 5; k += 2) {
18700 GemmMicrokernelTester()
18701 .mr(4)
18702 .nr(16)
18703 .kr(1)
18704 .sr(1)
18705 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018706 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018707 .k(k)
18708 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018709 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018710 }
18711 }
18712 }
18713
Marat Dukhande06f492020-04-09 00:19:31 -070018714 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018715 TEST_REQUIRES_X86_AVX;
18716 for (uint32_t n = 17; n < 32; n++) {
18717 for (size_t k = 1; k <= 5; k += 2) {
18718 for (uint32_t m = 1; m <= 4; m++) {
18719 GemmMicrokernelTester()
18720 .mr(4)
18721 .nr(16)
18722 .kr(1)
18723 .sr(1)
18724 .m(m)
18725 .n(n)
18726 .k(k)
18727 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018728 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018729 }
18730 }
18731 }
18732 }
18733
Marat Dukhande06f492020-04-09 00:19:31 -070018734 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018735 TEST_REQUIRES_X86_AVX;
18736 for (uint32_t n = 32; n <= 48; n += 16) {
18737 for (size_t k = 1; k <= 5; k += 2) {
18738 GemmMicrokernelTester()
18739 .mr(4)
18740 .nr(16)
18741 .kr(1)
18742 .sr(1)
18743 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018744 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018745 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018746 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018747 }
18748 }
18749 }
18750
Marat Dukhande06f492020-04-09 00:19:31 -070018751 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018752 TEST_REQUIRES_X86_AVX;
18753 for (uint32_t n = 32; n <= 48; n += 16) {
18754 for (size_t k = 1; k <= 5; k += 2) {
18755 GemmMicrokernelTester()
18756 .mr(4)
18757 .nr(16)
18758 .kr(1)
18759 .sr(1)
18760 .m(4)
18761 .n(n)
18762 .k(k)
18763 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018764 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018765 }
18766 }
18767 }
18768
Marat Dukhande06f492020-04-09 00:19:31 -070018769 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018770 TEST_REQUIRES_X86_AVX;
18771 for (uint32_t n = 32; n <= 48; n += 16) {
18772 for (size_t k = 1; k <= 5; k += 2) {
18773 for (uint32_t m = 1; m <= 4; m++) {
18774 GemmMicrokernelTester()
18775 .mr(4)
18776 .nr(16)
18777 .kr(1)
18778 .sr(1)
18779 .m(m)
18780 .n(n)
18781 .k(k)
18782 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018783 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018784 }
18785 }
18786 }
18787 }
18788
Marat Dukhande06f492020-04-09 00:19:31 -070018789 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018790 TEST_REQUIRES_X86_AVX;
18791 for (size_t k = 1; k <= 5; k += 2) {
18792 GemmMicrokernelTester()
18793 .mr(4)
18794 .nr(16)
18795 .kr(1)
18796 .sr(1)
18797 .m(4)
18798 .n(16)
18799 .k(k)
18800 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018801 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018802 }
18803 }
18804
Marat Dukhande06f492020-04-09 00:19:31 -070018805 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018806 TEST_REQUIRES_X86_AVX;
18807 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018808 for (uint32_t n = 1; n <= 16; n++) {
18809 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018810 GemmMicrokernelTester()
18811 .mr(4)
18812 .nr(16)
18813 .kr(1)
18814 .sr(1)
18815 .m(m)
18816 .n(n)
18817 .k(k)
18818 .ks(3)
18819 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018820 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018821 }
18822 }
18823 }
18824 }
18825
Marat Dukhande06f492020-04-09 00:19:31 -070018826 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018827 TEST_REQUIRES_X86_AVX;
18828 for (uint32_t n = 17; n < 32; n++) {
18829 for (size_t k = 1; k <= 5; k += 2) {
18830 GemmMicrokernelTester()
18831 .mr(4)
18832 .nr(16)
18833 .kr(1)
18834 .sr(1)
18835 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018836 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018837 .k(k)
18838 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018839 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018840 }
18841 }
18842 }
18843
Marat Dukhande06f492020-04-09 00:19:31 -070018844 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018845 TEST_REQUIRES_X86_AVX;
18846 for (uint32_t n = 32; n <= 48; n += 16) {
18847 for (size_t k = 1; k <= 5; k += 2) {
18848 GemmMicrokernelTester()
18849 .mr(4)
18850 .nr(16)
18851 .kr(1)
18852 .sr(1)
18853 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018854 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070018855 .k(k)
18856 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018857 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018858 }
18859 }
18860 }
18861
Marat Dukhande06f492020-04-09 00:19:31 -070018862 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018863 TEST_REQUIRES_X86_AVX;
18864 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018865 for (uint32_t n = 1; n <= 16; n++) {
18866 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018867 GemmMicrokernelTester()
18868 .mr(4)
18869 .nr(16)
18870 .kr(1)
18871 .sr(1)
18872 .m(m)
18873 .n(n)
18874 .k(k)
18875 .cm_stride(19)
18876 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018877 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018878 }
18879 }
18880 }
18881 }
18882
Marat Dukhande06f492020-04-09 00:19:31 -070018883 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018884 TEST_REQUIRES_X86_AVX;
18885 for (size_t k = 1; k <= 5; k += 2) {
18886 GemmMicrokernelTester()
18887 .mr(4)
18888 .nr(16)
18889 .kr(1)
18890 .sr(1)
18891 .m(4)
18892 .n(16)
18893 .k(k)
18894 .ks(3)
18895 .a_offset(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018896 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018897 }
18898 }
18899
Marat Dukhande06f492020-04-09 00:19:31 -070018900 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018901 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018902 for (size_t k = 1; k <= 5; k += 2) {
18903 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018904 GemmMicrokernelTester()
18905 .mr(4)
18906 .nr(16)
18907 .kr(1)
18908 .sr(1)
18909 .m(4)
18910 .n(16)
18911 .k(k)
18912 .ks(3)
18913 .a_offset(23)
18914 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018915 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018916 }
18917 }
18918 }
18919
Marat Dukhande06f492020-04-09 00:19:31 -070018920 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018921 TEST_REQUIRES_X86_AVX;
18922 GemmMicrokernelTester()
18923 .mr(4)
18924 .nr(16)
18925 .kr(1)
18926 .sr(1)
18927 .m(4)
18928 .n(16)
18929 .k(1)
18930 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018931 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018932 }
18933
Marat Dukhande06f492020-04-09 00:19:31 -070018934 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018935 TEST_REQUIRES_X86_AVX;
18936 GemmMicrokernelTester()
18937 .mr(4)
18938 .nr(16)
18939 .kr(1)
18940 .sr(1)
18941 .m(4)
18942 .n(16)
18943 .k(1)
18944 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018945 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018946 }
18947
Marat Dukhande06f492020-04-09 00:19:31 -070018948 TEST(F32_IGEMM_MINMAX_4X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018949 TEST_REQUIRES_X86_AVX;
18950 GemmMicrokernelTester()
18951 .mr(4)
18952 .nr(16)
18953 .kr(1)
18954 .sr(1)
18955 .m(4)
18956 .n(16)
18957 .k(1)
18958 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018959 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018960 }
18961#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18962
18963
18964#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070018965 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018966 TEST_REQUIRES_X86_AVX;
18967 GemmMicrokernelTester()
18968 .mr(5)
18969 .nr(16)
18970 .kr(1)
18971 .sr(1)
18972 .m(5)
18973 .n(16)
18974 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018975 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018976 }
18977
Marat Dukhande06f492020-04-09 00:19:31 -070018978 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018979 TEST_REQUIRES_X86_AVX;
18980 GemmMicrokernelTester()
18981 .mr(5)
18982 .nr(16)
18983 .kr(1)
18984 .sr(1)
18985 .m(5)
18986 .n(16)
18987 .k(1)
18988 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070018989 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070018990 }
18991
Marat Dukhande06f492020-04-09 00:19:31 -070018992 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018993 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018994 for (uint32_t n = 1; n <= 16; n++) {
18995 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070018996 GemmMicrokernelTester()
18997 .mr(5)
18998 .nr(16)
18999 .kr(1)
19000 .sr(1)
19001 .m(m)
19002 .n(n)
19003 .k(1)
19004 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019005 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019006 }
19007 }
19008 }
19009
Marat Dukhande06f492020-04-09 00:19:31 -070019010 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019011 TEST_REQUIRES_X86_AVX;
19012 for (uint32_t m = 1; m <= 5; m++) {
19013 GemmMicrokernelTester()
19014 .mr(5)
19015 .nr(16)
19016 .kr(1)
19017 .sr(1)
19018 .m(m)
19019 .n(16)
19020 .k(1)
19021 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019022 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019023 }
19024 }
19025
Marat Dukhande06f492020-04-09 00:19:31 -070019026 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019027 TEST_REQUIRES_X86_AVX;
19028 for (uint32_t n = 1; n <= 16; n++) {
19029 GemmMicrokernelTester()
19030 .mr(5)
19031 .nr(16)
19032 .kr(1)
19033 .sr(1)
19034 .m(5)
19035 .n(n)
19036 .k(1)
19037 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019038 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019039 }
19040 }
19041
Marat Dukhande06f492020-04-09 00:19:31 -070019042 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019043 TEST_REQUIRES_X86_AVX;
19044 for (size_t k = 2; k < 10; k++) {
19045 GemmMicrokernelTester()
19046 .mr(5)
19047 .nr(16)
19048 .kr(1)
19049 .sr(1)
19050 .m(5)
19051 .n(16)
19052 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019053 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019054 }
19055 }
19056
Marat Dukhande06f492020-04-09 00:19:31 -070019057 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019058 TEST_REQUIRES_X86_AVX;
19059 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019060 for (uint32_t n = 1; n <= 16; n++) {
19061 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019062 GemmMicrokernelTester()
19063 .mr(5)
19064 .nr(16)
19065 .kr(1)
19066 .sr(1)
19067 .m(m)
19068 .n(n)
19069 .k(k)
19070 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019071 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019072 }
19073 }
19074 }
19075 }
19076
Marat Dukhande06f492020-04-09 00:19:31 -070019077 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019078 TEST_REQUIRES_X86_AVX;
19079 for (uint32_t n = 17; n < 32; n++) {
19080 for (size_t k = 1; k <= 5; k += 2) {
19081 GemmMicrokernelTester()
19082 .mr(5)
19083 .nr(16)
19084 .kr(1)
19085 .sr(1)
19086 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019087 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019088 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019089 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019090 }
19091 }
19092 }
19093
Marat Dukhande06f492020-04-09 00:19:31 -070019094 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019095 TEST_REQUIRES_X86_AVX;
19096 for (uint32_t n = 17; n < 32; n++) {
19097 for (size_t k = 1; k <= 5; k += 2) {
19098 GemmMicrokernelTester()
19099 .mr(5)
19100 .nr(16)
19101 .kr(1)
19102 .sr(1)
19103 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019104 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019105 .k(k)
19106 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019107 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019108 }
19109 }
19110 }
19111
Marat Dukhande06f492020-04-09 00:19:31 -070019112 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019113 TEST_REQUIRES_X86_AVX;
19114 for (uint32_t n = 17; n < 32; n++) {
19115 for (size_t k = 1; k <= 5; k += 2) {
19116 for (uint32_t m = 1; m <= 5; m++) {
19117 GemmMicrokernelTester()
19118 .mr(5)
19119 .nr(16)
19120 .kr(1)
19121 .sr(1)
19122 .m(m)
19123 .n(n)
19124 .k(k)
19125 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019126 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019127 }
19128 }
19129 }
19130 }
19131
Marat Dukhande06f492020-04-09 00:19:31 -070019132 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019133 TEST_REQUIRES_X86_AVX;
19134 for (uint32_t n = 32; n <= 48; n += 16) {
19135 for (size_t k = 1; k <= 5; k += 2) {
19136 GemmMicrokernelTester()
19137 .mr(5)
19138 .nr(16)
19139 .kr(1)
19140 .sr(1)
19141 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019142 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019143 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019144 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019145 }
19146 }
19147 }
19148
Marat Dukhande06f492020-04-09 00:19:31 -070019149 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019150 TEST_REQUIRES_X86_AVX;
19151 for (uint32_t n = 32; n <= 48; n += 16) {
19152 for (size_t k = 1; k <= 5; k += 2) {
19153 GemmMicrokernelTester()
19154 .mr(5)
19155 .nr(16)
19156 .kr(1)
19157 .sr(1)
19158 .m(5)
19159 .n(n)
19160 .k(k)
19161 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019162 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019163 }
19164 }
19165 }
19166
Marat Dukhande06f492020-04-09 00:19:31 -070019167 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019168 TEST_REQUIRES_X86_AVX;
19169 for (uint32_t n = 32; n <= 48; n += 16) {
19170 for (size_t k = 1; k <= 5; k += 2) {
19171 for (uint32_t m = 1; m <= 5; m++) {
19172 GemmMicrokernelTester()
19173 .mr(5)
19174 .nr(16)
19175 .kr(1)
19176 .sr(1)
19177 .m(m)
19178 .n(n)
19179 .k(k)
19180 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019181 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019182 }
19183 }
19184 }
19185 }
19186
Marat Dukhande06f492020-04-09 00:19:31 -070019187 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019188 TEST_REQUIRES_X86_AVX;
19189 for (size_t k = 1; k <= 5; k += 2) {
19190 GemmMicrokernelTester()
19191 .mr(5)
19192 .nr(16)
19193 .kr(1)
19194 .sr(1)
19195 .m(5)
19196 .n(16)
19197 .k(k)
19198 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019199 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019200 }
19201 }
19202
Marat Dukhande06f492020-04-09 00:19:31 -070019203 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019204 TEST_REQUIRES_X86_AVX;
19205 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019206 for (uint32_t n = 1; n <= 16; n++) {
19207 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019208 GemmMicrokernelTester()
19209 .mr(5)
19210 .nr(16)
19211 .kr(1)
19212 .sr(1)
19213 .m(m)
19214 .n(n)
19215 .k(k)
19216 .ks(3)
19217 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019218 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019219 }
19220 }
19221 }
19222 }
19223
Marat Dukhande06f492020-04-09 00:19:31 -070019224 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019225 TEST_REQUIRES_X86_AVX;
19226 for (uint32_t n = 17; n < 32; n++) {
19227 for (size_t k = 1; k <= 5; k += 2) {
19228 GemmMicrokernelTester()
19229 .mr(5)
19230 .nr(16)
19231 .kr(1)
19232 .sr(1)
19233 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019234 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019235 .k(k)
19236 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019237 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019238 }
19239 }
19240 }
19241
Marat Dukhande06f492020-04-09 00:19:31 -070019242 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019243 TEST_REQUIRES_X86_AVX;
19244 for (uint32_t n = 32; n <= 48; n += 16) {
19245 for (size_t k = 1; k <= 5; k += 2) {
19246 GemmMicrokernelTester()
19247 .mr(5)
19248 .nr(16)
19249 .kr(1)
19250 .sr(1)
19251 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019252 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019253 .k(k)
19254 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019255 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019256 }
19257 }
19258 }
19259
Marat Dukhande06f492020-04-09 00:19:31 -070019260 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019261 TEST_REQUIRES_X86_AVX;
19262 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019263 for (uint32_t n = 1; n <= 16; n++) {
19264 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019265 GemmMicrokernelTester()
19266 .mr(5)
19267 .nr(16)
19268 .kr(1)
19269 .sr(1)
19270 .m(m)
19271 .n(n)
19272 .k(k)
19273 .cm_stride(19)
19274 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019275 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019276 }
19277 }
19278 }
19279 }
19280
Marat Dukhande06f492020-04-09 00:19:31 -070019281 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019282 TEST_REQUIRES_X86_AVX;
19283 for (size_t k = 1; k <= 5; k += 2) {
19284 GemmMicrokernelTester()
19285 .mr(5)
19286 .nr(16)
19287 .kr(1)
19288 .sr(1)
19289 .m(5)
19290 .n(16)
19291 .k(k)
19292 .ks(3)
19293 .a_offset(29)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019294 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019295 }
19296 }
19297
Marat Dukhande06f492020-04-09 00:19:31 -070019298 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019299 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019300 for (size_t k = 1; k <= 5; k += 2) {
19301 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019302 GemmMicrokernelTester()
19303 .mr(5)
19304 .nr(16)
19305 .kr(1)
19306 .sr(1)
19307 .m(5)
19308 .n(16)
19309 .k(k)
19310 .ks(3)
19311 .a_offset(29)
19312 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019313 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019314 }
19315 }
19316 }
19317
Marat Dukhande06f492020-04-09 00:19:31 -070019318 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019319 TEST_REQUIRES_X86_AVX;
19320 GemmMicrokernelTester()
19321 .mr(5)
19322 .nr(16)
19323 .kr(1)
19324 .sr(1)
19325 .m(5)
19326 .n(16)
19327 .k(1)
19328 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019329 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019330 }
19331
Marat Dukhande06f492020-04-09 00:19:31 -070019332 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019333 TEST_REQUIRES_X86_AVX;
19334 GemmMicrokernelTester()
19335 .mr(5)
19336 .nr(16)
19337 .kr(1)
19338 .sr(1)
19339 .m(5)
19340 .n(16)
19341 .k(1)
19342 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019343 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019344 }
19345
Marat Dukhande06f492020-04-09 00:19:31 -070019346 TEST(F32_IGEMM_MINMAX_5X16__AVX_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019347 TEST_REQUIRES_X86_AVX;
19348 GemmMicrokernelTester()
19349 .mr(5)
19350 .nr(16)
19351 .kr(1)
19352 .sr(1)
19353 .m(5)
19354 .n(16)
19355 .k(1)
19356 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019357 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019358 }
19359#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19360
19361
19362#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019363 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019364 TEST_REQUIRES_X86_FMA3;
19365 GemmMicrokernelTester()
19366 .mr(6)
19367 .nr(8)
19368 .kr(1)
19369 .sr(1)
19370 .m(6)
19371 .n(8)
19372 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019373 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019374 }
19375
Marat Dukhande06f492020-04-09 00:19:31 -070019376 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019377 TEST_REQUIRES_X86_FMA3;
19378 GemmMicrokernelTester()
19379 .mr(6)
19380 .nr(8)
19381 .kr(1)
19382 .sr(1)
19383 .m(6)
19384 .n(8)
19385 .k(1)
19386 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019387 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019388 }
19389
Marat Dukhande06f492020-04-09 00:19:31 -070019390 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019391 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019392 for (uint32_t n = 1; n <= 8; n++) {
19393 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019394 GemmMicrokernelTester()
19395 .mr(6)
19396 .nr(8)
19397 .kr(1)
19398 .sr(1)
19399 .m(m)
19400 .n(n)
19401 .k(1)
19402 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019403 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019404 }
19405 }
19406 }
19407
Marat Dukhande06f492020-04-09 00:19:31 -070019408 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019409 TEST_REQUIRES_X86_FMA3;
19410 for (uint32_t m = 1; m <= 6; m++) {
19411 GemmMicrokernelTester()
19412 .mr(6)
19413 .nr(8)
19414 .kr(1)
19415 .sr(1)
19416 .m(m)
19417 .n(8)
19418 .k(1)
19419 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019420 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019421 }
19422 }
19423
Marat Dukhande06f492020-04-09 00:19:31 -070019424 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019425 TEST_REQUIRES_X86_FMA3;
19426 for (uint32_t n = 1; n <= 8; n++) {
19427 GemmMicrokernelTester()
19428 .mr(6)
19429 .nr(8)
19430 .kr(1)
19431 .sr(1)
19432 .m(6)
19433 .n(n)
19434 .k(1)
19435 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019436 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019437 }
19438 }
19439
Marat Dukhande06f492020-04-09 00:19:31 -070019440 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019441 TEST_REQUIRES_X86_FMA3;
19442 for (size_t k = 2; k < 10; k++) {
19443 GemmMicrokernelTester()
19444 .mr(6)
19445 .nr(8)
19446 .kr(1)
19447 .sr(1)
19448 .m(6)
19449 .n(8)
19450 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019451 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019452 }
19453 }
19454
Marat Dukhande06f492020-04-09 00:19:31 -070019455 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019456 TEST_REQUIRES_X86_FMA3;
19457 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019458 for (uint32_t n = 1; n <= 8; n++) {
19459 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019460 GemmMicrokernelTester()
19461 .mr(6)
19462 .nr(8)
19463 .kr(1)
19464 .sr(1)
19465 .m(m)
19466 .n(n)
19467 .k(k)
19468 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019469 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019470 }
19471 }
19472 }
19473 }
19474
Marat Dukhande06f492020-04-09 00:19:31 -070019475 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019476 TEST_REQUIRES_X86_FMA3;
19477 for (uint32_t n = 9; n < 16; n++) {
19478 for (size_t k = 1; k <= 5; k += 2) {
19479 GemmMicrokernelTester()
19480 .mr(6)
19481 .nr(8)
19482 .kr(1)
19483 .sr(1)
19484 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019485 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019486 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019487 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019488 }
19489 }
19490 }
19491
Marat Dukhande06f492020-04-09 00:19:31 -070019492 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019493 TEST_REQUIRES_X86_FMA3;
19494 for (uint32_t n = 9; n < 16; n++) {
19495 for (size_t k = 1; k <= 5; k += 2) {
19496 GemmMicrokernelTester()
19497 .mr(6)
19498 .nr(8)
19499 .kr(1)
19500 .sr(1)
19501 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019502 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019503 .k(k)
19504 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019505 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019506 }
19507 }
19508 }
19509
Marat Dukhande06f492020-04-09 00:19:31 -070019510 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019511 TEST_REQUIRES_X86_FMA3;
19512 for (uint32_t n = 9; n < 16; n++) {
19513 for (size_t k = 1; k <= 5; k += 2) {
19514 for (uint32_t m = 1; m <= 6; m++) {
19515 GemmMicrokernelTester()
19516 .mr(6)
19517 .nr(8)
19518 .kr(1)
19519 .sr(1)
19520 .m(m)
19521 .n(n)
19522 .k(k)
19523 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019524 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019525 }
19526 }
19527 }
19528 }
19529
Marat Dukhande06f492020-04-09 00:19:31 -070019530 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019531 TEST_REQUIRES_X86_FMA3;
19532 for (uint32_t n = 16; n <= 24; n += 8) {
19533 for (size_t k = 1; k <= 5; k += 2) {
19534 GemmMicrokernelTester()
19535 .mr(6)
19536 .nr(8)
19537 .kr(1)
19538 .sr(1)
19539 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019540 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019541 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019542 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019543 }
19544 }
19545 }
19546
Marat Dukhande06f492020-04-09 00:19:31 -070019547 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019548 TEST_REQUIRES_X86_FMA3;
19549 for (uint32_t n = 16; n <= 24; n += 8) {
19550 for (size_t k = 1; k <= 5; k += 2) {
19551 GemmMicrokernelTester()
19552 .mr(6)
19553 .nr(8)
19554 .kr(1)
19555 .sr(1)
19556 .m(6)
19557 .n(n)
19558 .k(k)
19559 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019560 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019561 }
19562 }
19563 }
19564
Marat Dukhande06f492020-04-09 00:19:31 -070019565 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019566 TEST_REQUIRES_X86_FMA3;
19567 for (uint32_t n = 16; n <= 24; n += 8) {
19568 for (size_t k = 1; k <= 5; k += 2) {
19569 for (uint32_t m = 1; m <= 6; m++) {
19570 GemmMicrokernelTester()
19571 .mr(6)
19572 .nr(8)
19573 .kr(1)
19574 .sr(1)
19575 .m(m)
19576 .n(n)
19577 .k(k)
19578 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019579 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019580 }
19581 }
19582 }
19583 }
19584
Marat Dukhande06f492020-04-09 00:19:31 -070019585 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019586 TEST_REQUIRES_X86_FMA3;
19587 for (size_t k = 1; k <= 5; k += 2) {
19588 GemmMicrokernelTester()
19589 .mr(6)
19590 .nr(8)
19591 .kr(1)
19592 .sr(1)
19593 .m(6)
19594 .n(8)
19595 .k(k)
19596 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019597 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019598 }
19599 }
19600
Marat Dukhande06f492020-04-09 00:19:31 -070019601 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019602 TEST_REQUIRES_X86_FMA3;
19603 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019604 for (uint32_t n = 1; n <= 8; n++) {
19605 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019606 GemmMicrokernelTester()
19607 .mr(6)
19608 .nr(8)
19609 .kr(1)
19610 .sr(1)
19611 .m(m)
19612 .n(n)
19613 .k(k)
19614 .ks(3)
19615 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019616 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019617 }
19618 }
19619 }
19620 }
19621
Marat Dukhande06f492020-04-09 00:19:31 -070019622 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019623 TEST_REQUIRES_X86_FMA3;
19624 for (uint32_t n = 9; n < 16; n++) {
19625 for (size_t k = 1; k <= 5; k += 2) {
19626 GemmMicrokernelTester()
19627 .mr(6)
19628 .nr(8)
19629 .kr(1)
19630 .sr(1)
19631 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019632 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019633 .k(k)
19634 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019635 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019636 }
19637 }
19638 }
19639
Marat Dukhande06f492020-04-09 00:19:31 -070019640 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019641 TEST_REQUIRES_X86_FMA3;
19642 for (uint32_t n = 16; n <= 24; n += 8) {
19643 for (size_t k = 1; k <= 5; k += 2) {
19644 GemmMicrokernelTester()
19645 .mr(6)
19646 .nr(8)
19647 .kr(1)
19648 .sr(1)
19649 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019650 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019651 .k(k)
19652 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019653 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019654 }
19655 }
19656 }
19657
Marat Dukhande06f492020-04-09 00:19:31 -070019658 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019659 TEST_REQUIRES_X86_FMA3;
19660 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019661 for (uint32_t n = 1; n <= 8; n++) {
19662 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019663 GemmMicrokernelTester()
19664 .mr(6)
19665 .nr(8)
19666 .kr(1)
19667 .sr(1)
19668 .m(m)
19669 .n(n)
19670 .k(k)
19671 .cm_stride(11)
19672 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019673 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019674 }
19675 }
19676 }
19677 }
19678
Marat Dukhande06f492020-04-09 00:19:31 -070019679 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019680 TEST_REQUIRES_X86_FMA3;
19681 for (size_t k = 1; k <= 5; k += 2) {
19682 GemmMicrokernelTester()
19683 .mr(6)
19684 .nr(8)
19685 .kr(1)
19686 .sr(1)
19687 .m(6)
19688 .n(8)
19689 .k(k)
19690 .ks(3)
19691 .a_offset(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019692 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019693 }
19694 }
19695
Marat Dukhande06f492020-04-09 00:19:31 -070019696 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019697 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019698 for (size_t k = 1; k <= 5; k += 2) {
19699 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019700 GemmMicrokernelTester()
19701 .mr(6)
19702 .nr(8)
19703 .kr(1)
19704 .sr(1)
19705 .m(6)
19706 .n(8)
19707 .k(k)
19708 .ks(3)
19709 .a_offset(37)
19710 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019711 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019712 }
19713 }
19714 }
19715
Marat Dukhande06f492020-04-09 00:19:31 -070019716 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019717 TEST_REQUIRES_X86_FMA3;
19718 GemmMicrokernelTester()
19719 .mr(6)
19720 .nr(8)
19721 .kr(1)
19722 .sr(1)
19723 .m(6)
19724 .n(8)
19725 .k(1)
19726 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019727 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019728 }
19729
Marat Dukhande06f492020-04-09 00:19:31 -070019730 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019731 TEST_REQUIRES_X86_FMA3;
19732 GemmMicrokernelTester()
19733 .mr(6)
19734 .nr(8)
19735 .kr(1)
19736 .sr(1)
19737 .m(6)
19738 .n(8)
19739 .k(1)
19740 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019741 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019742 }
19743
Marat Dukhande06f492020-04-09 00:19:31 -070019744 TEST(F32_IGEMM_MINMAX_6X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019745 TEST_REQUIRES_X86_FMA3;
19746 GemmMicrokernelTester()
19747 .mr(6)
19748 .nr(8)
19749 .kr(1)
19750 .sr(1)
19751 .m(6)
19752 .n(8)
19753 .k(1)
19754 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019755 .Test(xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019756 }
19757#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19758
19759
19760#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070019761 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019762 TEST_REQUIRES_X86_FMA3;
19763 GemmMicrokernelTester()
19764 .mr(7)
19765 .nr(8)
19766 .kr(1)
19767 .sr(1)
19768 .m(7)
19769 .n(8)
19770 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019771 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019772 }
19773
Marat Dukhande06f492020-04-09 00:19:31 -070019774 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019775 TEST_REQUIRES_X86_FMA3;
19776 GemmMicrokernelTester()
19777 .mr(7)
19778 .nr(8)
19779 .kr(1)
19780 .sr(1)
19781 .m(7)
19782 .n(8)
19783 .k(1)
19784 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019785 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019786 }
19787
Marat Dukhande06f492020-04-09 00:19:31 -070019788 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019789 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019790 for (uint32_t n = 1; n <= 8; n++) {
19791 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019792 GemmMicrokernelTester()
19793 .mr(7)
19794 .nr(8)
19795 .kr(1)
19796 .sr(1)
19797 .m(m)
19798 .n(n)
19799 .k(1)
19800 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019801 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019802 }
19803 }
19804 }
19805
Marat Dukhande06f492020-04-09 00:19:31 -070019806 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019807 TEST_REQUIRES_X86_FMA3;
19808 for (uint32_t m = 1; m <= 7; m++) {
19809 GemmMicrokernelTester()
19810 .mr(7)
19811 .nr(8)
19812 .kr(1)
19813 .sr(1)
19814 .m(m)
19815 .n(8)
19816 .k(1)
19817 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019818 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019819 }
19820 }
19821
Marat Dukhande06f492020-04-09 00:19:31 -070019822 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019823 TEST_REQUIRES_X86_FMA3;
19824 for (uint32_t n = 1; n <= 8; n++) {
19825 GemmMicrokernelTester()
19826 .mr(7)
19827 .nr(8)
19828 .kr(1)
19829 .sr(1)
19830 .m(7)
19831 .n(n)
19832 .k(1)
19833 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019834 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019835 }
19836 }
19837
Marat Dukhande06f492020-04-09 00:19:31 -070019838 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019839 TEST_REQUIRES_X86_FMA3;
19840 for (size_t k = 2; k < 10; k++) {
19841 GemmMicrokernelTester()
19842 .mr(7)
19843 .nr(8)
19844 .kr(1)
19845 .sr(1)
19846 .m(7)
19847 .n(8)
19848 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019849 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019850 }
19851 }
19852
Marat Dukhande06f492020-04-09 00:19:31 -070019853 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019854 TEST_REQUIRES_X86_FMA3;
19855 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019856 for (uint32_t n = 1; n <= 8; n++) {
19857 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019858 GemmMicrokernelTester()
19859 .mr(7)
19860 .nr(8)
19861 .kr(1)
19862 .sr(1)
19863 .m(m)
19864 .n(n)
19865 .k(k)
19866 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019867 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019868 }
19869 }
19870 }
19871 }
19872
Marat Dukhande06f492020-04-09 00:19:31 -070019873 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019874 TEST_REQUIRES_X86_FMA3;
19875 for (uint32_t n = 9; n < 16; n++) {
19876 for (size_t k = 1; k <= 5; k += 2) {
19877 GemmMicrokernelTester()
19878 .mr(7)
19879 .nr(8)
19880 .kr(1)
19881 .sr(1)
19882 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019883 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019884 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019885 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019886 }
19887 }
19888 }
19889
Marat Dukhande06f492020-04-09 00:19:31 -070019890 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019891 TEST_REQUIRES_X86_FMA3;
19892 for (uint32_t n = 9; n < 16; n++) {
19893 for (size_t k = 1; k <= 5; k += 2) {
19894 GemmMicrokernelTester()
19895 .mr(7)
19896 .nr(8)
19897 .kr(1)
19898 .sr(1)
19899 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019900 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019901 .k(k)
19902 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019903 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019904 }
19905 }
19906 }
19907
Marat Dukhande06f492020-04-09 00:19:31 -070019908 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019909 TEST_REQUIRES_X86_FMA3;
19910 for (uint32_t n = 9; n < 16; n++) {
19911 for (size_t k = 1; k <= 5; k += 2) {
19912 for (uint32_t m = 1; m <= 7; m++) {
19913 GemmMicrokernelTester()
19914 .mr(7)
19915 .nr(8)
19916 .kr(1)
19917 .sr(1)
19918 .m(m)
19919 .n(n)
19920 .k(k)
19921 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019922 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019923 }
19924 }
19925 }
19926 }
19927
Marat Dukhande06f492020-04-09 00:19:31 -070019928 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019929 TEST_REQUIRES_X86_FMA3;
19930 for (uint32_t n = 16; n <= 24; n += 8) {
19931 for (size_t k = 1; k <= 5; k += 2) {
19932 GemmMicrokernelTester()
19933 .mr(7)
19934 .nr(8)
19935 .kr(1)
19936 .sr(1)
19937 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019938 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070019939 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019940 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019941 }
19942 }
19943 }
19944
Marat Dukhande06f492020-04-09 00:19:31 -070019945 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019946 TEST_REQUIRES_X86_FMA3;
19947 for (uint32_t n = 16; n <= 24; n += 8) {
19948 for (size_t k = 1; k <= 5; k += 2) {
19949 GemmMicrokernelTester()
19950 .mr(7)
19951 .nr(8)
19952 .kr(1)
19953 .sr(1)
19954 .m(7)
19955 .n(n)
19956 .k(k)
19957 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019958 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019959 }
19960 }
19961 }
19962
Marat Dukhande06f492020-04-09 00:19:31 -070019963 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019964 TEST_REQUIRES_X86_FMA3;
19965 for (uint32_t n = 16; n <= 24; n += 8) {
19966 for (size_t k = 1; k <= 5; k += 2) {
19967 for (uint32_t m = 1; m <= 7; m++) {
19968 GemmMicrokernelTester()
19969 .mr(7)
19970 .nr(8)
19971 .kr(1)
19972 .sr(1)
19973 .m(m)
19974 .n(n)
19975 .k(k)
19976 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019977 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019978 }
19979 }
19980 }
19981 }
19982
Marat Dukhande06f492020-04-09 00:19:31 -070019983 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070019984 TEST_REQUIRES_X86_FMA3;
19985 for (size_t k = 1; k <= 5; k += 2) {
19986 GemmMicrokernelTester()
19987 .mr(7)
19988 .nr(8)
19989 .kr(1)
19990 .sr(1)
19991 .m(7)
19992 .n(8)
19993 .k(k)
19994 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070019995 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070019996 }
19997 }
19998
Marat Dukhande06f492020-04-09 00:19:31 -070019999 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020000 TEST_REQUIRES_X86_FMA3;
20001 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020002 for (uint32_t n = 1; n <= 8; n++) {
20003 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020004 GemmMicrokernelTester()
20005 .mr(7)
20006 .nr(8)
20007 .kr(1)
20008 .sr(1)
20009 .m(m)
20010 .n(n)
20011 .k(k)
20012 .ks(3)
20013 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020014 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020015 }
20016 }
20017 }
20018 }
20019
Marat Dukhande06f492020-04-09 00:19:31 -070020020 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020021 TEST_REQUIRES_X86_FMA3;
20022 for (uint32_t n = 9; n < 16; n++) {
20023 for (size_t k = 1; k <= 5; k += 2) {
20024 GemmMicrokernelTester()
20025 .mr(7)
20026 .nr(8)
20027 .kr(1)
20028 .sr(1)
20029 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020030 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020031 .k(k)
20032 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020033 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020034 }
20035 }
20036 }
20037
Marat Dukhande06f492020-04-09 00:19:31 -070020038 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020039 TEST_REQUIRES_X86_FMA3;
20040 for (uint32_t n = 16; n <= 24; n += 8) {
20041 for (size_t k = 1; k <= 5; k += 2) {
20042 GemmMicrokernelTester()
20043 .mr(7)
20044 .nr(8)
20045 .kr(1)
20046 .sr(1)
20047 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020048 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020049 .k(k)
20050 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020051 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020052 }
20053 }
20054 }
20055
Marat Dukhande06f492020-04-09 00:19:31 -070020056 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020057 TEST_REQUIRES_X86_FMA3;
20058 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020059 for (uint32_t n = 1; n <= 8; n++) {
20060 for (uint32_t m = 1; m <= 7; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020061 GemmMicrokernelTester()
20062 .mr(7)
20063 .nr(8)
20064 .kr(1)
20065 .sr(1)
20066 .m(m)
20067 .n(n)
20068 .k(k)
20069 .cm_stride(11)
20070 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020071 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020072 }
20073 }
20074 }
20075 }
20076
Marat Dukhande06f492020-04-09 00:19:31 -070020077 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020078 TEST_REQUIRES_X86_FMA3;
20079 for (size_t k = 1; k <= 5; k += 2) {
20080 GemmMicrokernelTester()
20081 .mr(7)
20082 .nr(8)
20083 .kr(1)
20084 .sr(1)
20085 .m(7)
20086 .n(8)
20087 .k(k)
20088 .ks(3)
20089 .a_offset(37)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020090 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020091 }
20092 }
20093
Marat Dukhande06f492020-04-09 00:19:31 -070020094 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020095 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020096 for (size_t k = 1; k <= 5; k += 2) {
20097 for (uint32_t mz = 0; mz < 7; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020098 GemmMicrokernelTester()
20099 .mr(7)
20100 .nr(8)
20101 .kr(1)
20102 .sr(1)
20103 .m(7)
20104 .n(8)
20105 .k(k)
20106 .ks(3)
20107 .a_offset(37)
20108 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020109 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020110 }
20111 }
20112 }
20113
Marat Dukhande06f492020-04-09 00:19:31 -070020114 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020115 TEST_REQUIRES_X86_FMA3;
20116 GemmMicrokernelTester()
20117 .mr(7)
20118 .nr(8)
20119 .kr(1)
20120 .sr(1)
20121 .m(7)
20122 .n(8)
20123 .k(1)
20124 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020125 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020126 }
20127
Marat Dukhande06f492020-04-09 00:19:31 -070020128 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020129 TEST_REQUIRES_X86_FMA3;
20130 GemmMicrokernelTester()
20131 .mr(7)
20132 .nr(8)
20133 .kr(1)
20134 .sr(1)
20135 .m(7)
20136 .n(8)
20137 .k(1)
20138 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020139 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020140 }
20141
Marat Dukhande06f492020-04-09 00:19:31 -070020142 TEST(F32_IGEMM_MINMAX_7X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020143 TEST_REQUIRES_X86_FMA3;
20144 GemmMicrokernelTester()
20145 .mr(7)
20146 .nr(8)
20147 .kr(1)
20148 .sr(1)
20149 .m(7)
20150 .n(8)
20151 .k(1)
20152 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020153 .Test(xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020154 }
20155#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20156
20157
20158#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020159 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020160 TEST_REQUIRES_X86_FMA3;
20161 GemmMicrokernelTester()
20162 .mr(8)
20163 .nr(8)
20164 .kr(1)
20165 .sr(1)
20166 .m(8)
20167 .n(8)
20168 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020169 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020170 }
20171
Marat Dukhande06f492020-04-09 00:19:31 -070020172 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020173 TEST_REQUIRES_X86_FMA3;
20174 GemmMicrokernelTester()
20175 .mr(8)
20176 .nr(8)
20177 .kr(1)
20178 .sr(1)
20179 .m(8)
20180 .n(8)
20181 .k(1)
20182 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020183 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020184 }
20185
Marat Dukhande06f492020-04-09 00:19:31 -070020186 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020187 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020188 for (uint32_t n = 1; n <= 8; n++) {
20189 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020190 GemmMicrokernelTester()
20191 .mr(8)
20192 .nr(8)
20193 .kr(1)
20194 .sr(1)
20195 .m(m)
20196 .n(n)
20197 .k(1)
20198 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020199 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020200 }
20201 }
20202 }
20203
Marat Dukhande06f492020-04-09 00:19:31 -070020204 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020205 TEST_REQUIRES_X86_FMA3;
20206 for (uint32_t m = 1; m <= 8; m++) {
20207 GemmMicrokernelTester()
20208 .mr(8)
20209 .nr(8)
20210 .kr(1)
20211 .sr(1)
20212 .m(m)
20213 .n(8)
20214 .k(1)
20215 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020216 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020217 }
20218 }
20219
Marat Dukhande06f492020-04-09 00:19:31 -070020220 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020221 TEST_REQUIRES_X86_FMA3;
20222 for (uint32_t n = 1; n <= 8; n++) {
20223 GemmMicrokernelTester()
20224 .mr(8)
20225 .nr(8)
20226 .kr(1)
20227 .sr(1)
20228 .m(8)
20229 .n(n)
20230 .k(1)
20231 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020232 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020233 }
20234 }
20235
Marat Dukhande06f492020-04-09 00:19:31 -070020236 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020237 TEST_REQUIRES_X86_FMA3;
20238 for (size_t k = 2; k < 10; k++) {
20239 GemmMicrokernelTester()
20240 .mr(8)
20241 .nr(8)
20242 .kr(1)
20243 .sr(1)
20244 .m(8)
20245 .n(8)
20246 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020247 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020248 }
20249 }
20250
Marat Dukhande06f492020-04-09 00:19:31 -070020251 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020252 TEST_REQUIRES_X86_FMA3;
20253 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020254 for (uint32_t n = 1; n <= 8; n++) {
20255 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020256 GemmMicrokernelTester()
20257 .mr(8)
20258 .nr(8)
20259 .kr(1)
20260 .sr(1)
20261 .m(m)
20262 .n(n)
20263 .k(k)
20264 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020265 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020266 }
20267 }
20268 }
20269 }
20270
Marat Dukhande06f492020-04-09 00:19:31 -070020271 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020272 TEST_REQUIRES_X86_FMA3;
20273 for (uint32_t n = 9; n < 16; n++) {
20274 for (size_t k = 1; k <= 5; k += 2) {
20275 GemmMicrokernelTester()
20276 .mr(8)
20277 .nr(8)
20278 .kr(1)
20279 .sr(1)
20280 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020281 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020282 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020283 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020284 }
20285 }
20286 }
20287
Marat Dukhande06f492020-04-09 00:19:31 -070020288 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020289 TEST_REQUIRES_X86_FMA3;
20290 for (uint32_t n = 9; n < 16; n++) {
20291 for (size_t k = 1; k <= 5; k += 2) {
20292 GemmMicrokernelTester()
20293 .mr(8)
20294 .nr(8)
20295 .kr(1)
20296 .sr(1)
20297 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020298 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020299 .k(k)
20300 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020301 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020302 }
20303 }
20304 }
20305
Marat Dukhande06f492020-04-09 00:19:31 -070020306 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020307 TEST_REQUIRES_X86_FMA3;
20308 for (uint32_t n = 9; n < 16; n++) {
20309 for (size_t k = 1; k <= 5; k += 2) {
20310 for (uint32_t m = 1; m <= 8; m++) {
20311 GemmMicrokernelTester()
20312 .mr(8)
20313 .nr(8)
20314 .kr(1)
20315 .sr(1)
20316 .m(m)
20317 .n(n)
20318 .k(k)
20319 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020320 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020321 }
20322 }
20323 }
20324 }
20325
Marat Dukhande06f492020-04-09 00:19:31 -070020326 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020327 TEST_REQUIRES_X86_FMA3;
20328 for (uint32_t n = 16; n <= 24; n += 8) {
20329 for (size_t k = 1; k <= 5; k += 2) {
20330 GemmMicrokernelTester()
20331 .mr(8)
20332 .nr(8)
20333 .kr(1)
20334 .sr(1)
20335 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020336 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020337 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020338 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020339 }
20340 }
20341 }
20342
Marat Dukhande06f492020-04-09 00:19:31 -070020343 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020344 TEST_REQUIRES_X86_FMA3;
20345 for (uint32_t n = 16; n <= 24; n += 8) {
20346 for (size_t k = 1; k <= 5; k += 2) {
20347 GemmMicrokernelTester()
20348 .mr(8)
20349 .nr(8)
20350 .kr(1)
20351 .sr(1)
20352 .m(8)
20353 .n(n)
20354 .k(k)
20355 .cn_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020356 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020357 }
20358 }
20359 }
20360
Marat Dukhande06f492020-04-09 00:19:31 -070020361 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020362 TEST_REQUIRES_X86_FMA3;
20363 for (uint32_t n = 16; n <= 24; n += 8) {
20364 for (size_t k = 1; k <= 5; k += 2) {
20365 for (uint32_t m = 1; m <= 8; m++) {
20366 GemmMicrokernelTester()
20367 .mr(8)
20368 .nr(8)
20369 .kr(1)
20370 .sr(1)
20371 .m(m)
20372 .n(n)
20373 .k(k)
20374 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020375 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020376 }
20377 }
20378 }
20379 }
20380
Marat Dukhande06f492020-04-09 00:19:31 -070020381 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020382 TEST_REQUIRES_X86_FMA3;
20383 for (size_t k = 1; k <= 5; k += 2) {
20384 GemmMicrokernelTester()
20385 .mr(8)
20386 .nr(8)
20387 .kr(1)
20388 .sr(1)
20389 .m(8)
20390 .n(8)
20391 .k(k)
20392 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020393 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020394 }
20395 }
20396
Marat Dukhande06f492020-04-09 00:19:31 -070020397 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020398 TEST_REQUIRES_X86_FMA3;
20399 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020400 for (uint32_t n = 1; n <= 8; n++) {
20401 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020402 GemmMicrokernelTester()
20403 .mr(8)
20404 .nr(8)
20405 .kr(1)
20406 .sr(1)
20407 .m(m)
20408 .n(n)
20409 .k(k)
20410 .ks(3)
20411 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020412 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020413 }
20414 }
20415 }
20416 }
20417
Marat Dukhande06f492020-04-09 00:19:31 -070020418 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020419 TEST_REQUIRES_X86_FMA3;
20420 for (uint32_t n = 9; n < 16; n++) {
20421 for (size_t k = 1; k <= 5; k += 2) {
20422 GemmMicrokernelTester()
20423 .mr(8)
20424 .nr(8)
20425 .kr(1)
20426 .sr(1)
20427 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020428 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020429 .k(k)
20430 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020431 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020432 }
20433 }
20434 }
20435
Marat Dukhande06f492020-04-09 00:19:31 -070020436 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, n_div_8_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020437 TEST_REQUIRES_X86_FMA3;
20438 for (uint32_t n = 16; n <= 24; n += 8) {
20439 for (size_t k = 1; k <= 5; k += 2) {
20440 GemmMicrokernelTester()
20441 .mr(8)
20442 .nr(8)
20443 .kr(1)
20444 .sr(1)
20445 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020446 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020447 .k(k)
20448 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020449 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020450 }
20451 }
20452 }
20453
Marat Dukhande06f492020-04-09 00:19:31 -070020454 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020455 TEST_REQUIRES_X86_FMA3;
20456 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020457 for (uint32_t n = 1; n <= 8; n++) {
20458 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020459 GemmMicrokernelTester()
20460 .mr(8)
20461 .nr(8)
20462 .kr(1)
20463 .sr(1)
20464 .m(m)
20465 .n(n)
20466 .k(k)
20467 .cm_stride(11)
20468 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020469 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020470 }
20471 }
20472 }
20473 }
20474
Marat Dukhande06f492020-04-09 00:19:31 -070020475 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020476 TEST_REQUIRES_X86_FMA3;
20477 for (size_t k = 1; k <= 5; k += 2) {
20478 GemmMicrokernelTester()
20479 .mr(8)
20480 .nr(8)
20481 .kr(1)
20482 .sr(1)
20483 .m(8)
20484 .n(8)
20485 .k(k)
20486 .ks(3)
20487 .a_offset(43)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020488 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020489 }
20490 }
20491
Marat Dukhande06f492020-04-09 00:19:31 -070020492 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020493 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020494 for (size_t k = 1; k <= 5; k += 2) {
20495 for (uint32_t mz = 0; mz < 8; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020496 GemmMicrokernelTester()
20497 .mr(8)
20498 .nr(8)
20499 .kr(1)
20500 .sr(1)
20501 .m(8)
20502 .n(8)
20503 .k(k)
20504 .ks(3)
20505 .a_offset(43)
20506 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020507 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020508 }
20509 }
20510 }
20511
Marat Dukhande06f492020-04-09 00:19:31 -070020512 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020513 TEST_REQUIRES_X86_FMA3;
20514 GemmMicrokernelTester()
20515 .mr(8)
20516 .nr(8)
20517 .kr(1)
20518 .sr(1)
20519 .m(8)
20520 .n(8)
20521 .k(1)
20522 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020523 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020524 }
20525
Marat Dukhande06f492020-04-09 00:19:31 -070020526 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020527 TEST_REQUIRES_X86_FMA3;
20528 GemmMicrokernelTester()
20529 .mr(8)
20530 .nr(8)
20531 .kr(1)
20532 .sr(1)
20533 .m(8)
20534 .n(8)
20535 .k(1)
20536 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020537 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020538 }
20539
Marat Dukhande06f492020-04-09 00:19:31 -070020540 TEST(F32_IGEMM_MINMAX_8X8__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020541 TEST_REQUIRES_X86_FMA3;
20542 GemmMicrokernelTester()
20543 .mr(8)
20544 .nr(8)
20545 .kr(1)
20546 .sr(1)
20547 .m(8)
20548 .n(8)
20549 .k(1)
20550 .cm_stride(11)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020551 .Test(xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020552 }
20553#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20554
20555
20556#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020557 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020558 TEST_REQUIRES_X86_FMA3;
20559 GemmMicrokernelTester()
20560 .mr(4)
20561 .nr(16)
20562 .kr(1)
20563 .sr(1)
20564 .m(4)
20565 .n(16)
20566 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020567 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020568 }
20569
Marat Dukhande06f492020-04-09 00:19:31 -070020570 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020571 TEST_REQUIRES_X86_FMA3;
20572 GemmMicrokernelTester()
20573 .mr(4)
20574 .nr(16)
20575 .kr(1)
20576 .sr(1)
20577 .m(4)
20578 .n(16)
20579 .k(1)
20580 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020581 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020582 }
20583
Marat Dukhande06f492020-04-09 00:19:31 -070020584 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020585 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020586 for (uint32_t n = 1; n <= 16; n++) {
20587 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020588 GemmMicrokernelTester()
20589 .mr(4)
20590 .nr(16)
20591 .kr(1)
20592 .sr(1)
20593 .m(m)
20594 .n(n)
20595 .k(1)
20596 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020597 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020598 }
20599 }
20600 }
20601
Marat Dukhande06f492020-04-09 00:19:31 -070020602 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020603 TEST_REQUIRES_X86_FMA3;
20604 for (uint32_t m = 1; m <= 4; m++) {
20605 GemmMicrokernelTester()
20606 .mr(4)
20607 .nr(16)
20608 .kr(1)
20609 .sr(1)
20610 .m(m)
20611 .n(16)
20612 .k(1)
20613 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020614 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020615 }
20616 }
20617
Marat Dukhande06f492020-04-09 00:19:31 -070020618 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020619 TEST_REQUIRES_X86_FMA3;
20620 for (uint32_t n = 1; n <= 16; n++) {
20621 GemmMicrokernelTester()
20622 .mr(4)
20623 .nr(16)
20624 .kr(1)
20625 .sr(1)
20626 .m(4)
20627 .n(n)
20628 .k(1)
20629 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020630 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020631 }
20632 }
20633
Marat Dukhande06f492020-04-09 00:19:31 -070020634 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020635 TEST_REQUIRES_X86_FMA3;
20636 for (size_t k = 2; k < 10; k++) {
20637 GemmMicrokernelTester()
20638 .mr(4)
20639 .nr(16)
20640 .kr(1)
20641 .sr(1)
20642 .m(4)
20643 .n(16)
20644 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020645 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020646 }
20647 }
20648
Marat Dukhande06f492020-04-09 00:19:31 -070020649 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020650 TEST_REQUIRES_X86_FMA3;
20651 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020652 for (uint32_t n = 1; n <= 16; n++) {
20653 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020654 GemmMicrokernelTester()
20655 .mr(4)
20656 .nr(16)
20657 .kr(1)
20658 .sr(1)
20659 .m(m)
20660 .n(n)
20661 .k(k)
20662 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020663 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020664 }
20665 }
20666 }
20667 }
20668
Marat Dukhande06f492020-04-09 00:19:31 -070020669 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020670 TEST_REQUIRES_X86_FMA3;
20671 for (uint32_t n = 17; n < 32; n++) {
20672 for (size_t k = 1; k <= 5; k += 2) {
20673 GemmMicrokernelTester()
20674 .mr(4)
20675 .nr(16)
20676 .kr(1)
20677 .sr(1)
20678 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020679 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020680 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020681 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020682 }
20683 }
20684 }
20685
Marat Dukhande06f492020-04-09 00:19:31 -070020686 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020687 TEST_REQUIRES_X86_FMA3;
20688 for (uint32_t n = 17; n < 32; n++) {
20689 for (size_t k = 1; k <= 5; k += 2) {
20690 GemmMicrokernelTester()
20691 .mr(4)
20692 .nr(16)
20693 .kr(1)
20694 .sr(1)
20695 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020696 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020697 .k(k)
20698 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020699 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020700 }
20701 }
20702 }
20703
Marat Dukhande06f492020-04-09 00:19:31 -070020704 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020705 TEST_REQUIRES_X86_FMA3;
20706 for (uint32_t n = 17; n < 32; n++) {
20707 for (size_t k = 1; k <= 5; k += 2) {
20708 for (uint32_t m = 1; m <= 4; m++) {
20709 GemmMicrokernelTester()
20710 .mr(4)
20711 .nr(16)
20712 .kr(1)
20713 .sr(1)
20714 .m(m)
20715 .n(n)
20716 .k(k)
20717 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020718 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020719 }
20720 }
20721 }
20722 }
20723
Marat Dukhande06f492020-04-09 00:19:31 -070020724 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020725 TEST_REQUIRES_X86_FMA3;
20726 for (uint32_t n = 32; n <= 48; n += 16) {
20727 for (size_t k = 1; k <= 5; k += 2) {
20728 GemmMicrokernelTester()
20729 .mr(4)
20730 .nr(16)
20731 .kr(1)
20732 .sr(1)
20733 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020734 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020735 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020736 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020737 }
20738 }
20739 }
20740
Marat Dukhande06f492020-04-09 00:19:31 -070020741 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020742 TEST_REQUIRES_X86_FMA3;
20743 for (uint32_t n = 32; n <= 48; n += 16) {
20744 for (size_t k = 1; k <= 5; k += 2) {
20745 GemmMicrokernelTester()
20746 .mr(4)
20747 .nr(16)
20748 .kr(1)
20749 .sr(1)
20750 .m(4)
20751 .n(n)
20752 .k(k)
20753 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020754 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020755 }
20756 }
20757 }
20758
Marat Dukhande06f492020-04-09 00:19:31 -070020759 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020760 TEST_REQUIRES_X86_FMA3;
20761 for (uint32_t n = 32; n <= 48; n += 16) {
20762 for (size_t k = 1; k <= 5; k += 2) {
20763 for (uint32_t m = 1; m <= 4; m++) {
20764 GemmMicrokernelTester()
20765 .mr(4)
20766 .nr(16)
20767 .kr(1)
20768 .sr(1)
20769 .m(m)
20770 .n(n)
20771 .k(k)
20772 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020773 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020774 }
20775 }
20776 }
20777 }
20778
Marat Dukhande06f492020-04-09 00:19:31 -070020779 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020780 TEST_REQUIRES_X86_FMA3;
20781 for (size_t k = 1; k <= 5; k += 2) {
20782 GemmMicrokernelTester()
20783 .mr(4)
20784 .nr(16)
20785 .kr(1)
20786 .sr(1)
20787 .m(4)
20788 .n(16)
20789 .k(k)
20790 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020791 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020792 }
20793 }
20794
Marat Dukhande06f492020-04-09 00:19:31 -070020795 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020796 TEST_REQUIRES_X86_FMA3;
20797 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020798 for (uint32_t n = 1; n <= 16; n++) {
20799 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020800 GemmMicrokernelTester()
20801 .mr(4)
20802 .nr(16)
20803 .kr(1)
20804 .sr(1)
20805 .m(m)
20806 .n(n)
20807 .k(k)
20808 .ks(3)
20809 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020810 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020811 }
20812 }
20813 }
20814 }
20815
Marat Dukhande06f492020-04-09 00:19:31 -070020816 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020817 TEST_REQUIRES_X86_FMA3;
20818 for (uint32_t n = 17; n < 32; n++) {
20819 for (size_t k = 1; k <= 5; k += 2) {
20820 GemmMicrokernelTester()
20821 .mr(4)
20822 .nr(16)
20823 .kr(1)
20824 .sr(1)
20825 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020826 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020827 .k(k)
20828 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020829 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020830 }
20831 }
20832 }
20833
Marat Dukhande06f492020-04-09 00:19:31 -070020834 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020835 TEST_REQUIRES_X86_FMA3;
20836 for (uint32_t n = 32; n <= 48; n += 16) {
20837 for (size_t k = 1; k <= 5; k += 2) {
20838 GemmMicrokernelTester()
20839 .mr(4)
20840 .nr(16)
20841 .kr(1)
20842 .sr(1)
20843 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020844 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070020845 .k(k)
20846 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020847 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020848 }
20849 }
20850 }
20851
Marat Dukhande06f492020-04-09 00:19:31 -070020852 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020853 TEST_REQUIRES_X86_FMA3;
20854 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020855 for (uint32_t n = 1; n <= 16; n++) {
20856 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020857 GemmMicrokernelTester()
20858 .mr(4)
20859 .nr(16)
20860 .kr(1)
20861 .sr(1)
20862 .m(m)
20863 .n(n)
20864 .k(k)
20865 .cm_stride(19)
20866 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020867 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020868 }
20869 }
20870 }
20871 }
20872
Marat Dukhande06f492020-04-09 00:19:31 -070020873 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020874 TEST_REQUIRES_X86_FMA3;
20875 for (size_t k = 1; k <= 5; k += 2) {
20876 GemmMicrokernelTester()
20877 .mr(4)
20878 .nr(16)
20879 .kr(1)
20880 .sr(1)
20881 .m(4)
20882 .n(16)
20883 .k(k)
20884 .ks(3)
20885 .a_offset(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020886 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020887 }
20888 }
20889
Marat Dukhande06f492020-04-09 00:19:31 -070020890 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020891 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020892 for (size_t k = 1; k <= 5; k += 2) {
20893 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020894 GemmMicrokernelTester()
20895 .mr(4)
20896 .nr(16)
20897 .kr(1)
20898 .sr(1)
20899 .m(4)
20900 .n(16)
20901 .k(k)
20902 .ks(3)
20903 .a_offset(23)
20904 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020905 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020906 }
20907 }
20908 }
20909
Marat Dukhande06f492020-04-09 00:19:31 -070020910 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020911 TEST_REQUIRES_X86_FMA3;
20912 GemmMicrokernelTester()
20913 .mr(4)
20914 .nr(16)
20915 .kr(1)
20916 .sr(1)
20917 .m(4)
20918 .n(16)
20919 .k(1)
20920 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020921 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020922 }
20923
Marat Dukhande06f492020-04-09 00:19:31 -070020924 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020925 TEST_REQUIRES_X86_FMA3;
20926 GemmMicrokernelTester()
20927 .mr(4)
20928 .nr(16)
20929 .kr(1)
20930 .sr(1)
20931 .m(4)
20932 .n(16)
20933 .k(1)
20934 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020935 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020936 }
20937
Marat Dukhande06f492020-04-09 00:19:31 -070020938 TEST(F32_IGEMM_MINMAX_4X16__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020939 TEST_REQUIRES_X86_FMA3;
20940 GemmMicrokernelTester()
20941 .mr(4)
20942 .nr(16)
20943 .kr(1)
20944 .sr(1)
20945 .m(4)
20946 .n(16)
20947 .k(1)
20948 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020949 .Test(xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020950 }
20951#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20952
20953
20954#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070020955 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020956 TEST_REQUIRES_X86_FMA3;
20957 GemmMicrokernelTester()
20958 .mr(5)
20959 .nr(16)
20960 .kr(1)
20961 .sr(1)
20962 .m(5)
20963 .n(16)
20964 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020965 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020966 }
20967
Marat Dukhande06f492020-04-09 00:19:31 -070020968 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020969 TEST_REQUIRES_X86_FMA3;
20970 GemmMicrokernelTester()
20971 .mr(5)
20972 .nr(16)
20973 .kr(1)
20974 .sr(1)
20975 .m(5)
20976 .n(16)
20977 .k(1)
20978 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020979 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020980 }
20981
Marat Dukhande06f492020-04-09 00:19:31 -070020982 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020983 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020984 for (uint32_t n = 1; n <= 16; n++) {
20985 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070020986 GemmMicrokernelTester()
20987 .mr(5)
20988 .nr(16)
20989 .kr(1)
20990 .sr(1)
20991 .m(m)
20992 .n(n)
20993 .k(1)
20994 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070020995 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070020996 }
20997 }
20998 }
20999
Marat Dukhande06f492020-04-09 00:19:31 -070021000 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021001 TEST_REQUIRES_X86_FMA3;
21002 for (uint32_t m = 1; m <= 5; m++) {
21003 GemmMicrokernelTester()
21004 .mr(5)
21005 .nr(16)
21006 .kr(1)
21007 .sr(1)
21008 .m(m)
21009 .n(16)
21010 .k(1)
21011 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021012 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021013 }
21014 }
21015
Marat Dukhande06f492020-04-09 00:19:31 -070021016 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021017 TEST_REQUIRES_X86_FMA3;
21018 for (uint32_t n = 1; n <= 16; n++) {
21019 GemmMicrokernelTester()
21020 .mr(5)
21021 .nr(16)
21022 .kr(1)
21023 .sr(1)
21024 .m(5)
21025 .n(n)
21026 .k(1)
21027 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021028 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021029 }
21030 }
21031
Marat Dukhande06f492020-04-09 00:19:31 -070021032 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021033 TEST_REQUIRES_X86_FMA3;
21034 for (size_t k = 2; k < 10; k++) {
21035 GemmMicrokernelTester()
21036 .mr(5)
21037 .nr(16)
21038 .kr(1)
21039 .sr(1)
21040 .m(5)
21041 .n(16)
21042 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021043 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021044 }
21045 }
21046
Marat Dukhande06f492020-04-09 00:19:31 -070021047 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021048 TEST_REQUIRES_X86_FMA3;
21049 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021050 for (uint32_t n = 1; n <= 16; n++) {
21051 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021052 GemmMicrokernelTester()
21053 .mr(5)
21054 .nr(16)
21055 .kr(1)
21056 .sr(1)
21057 .m(m)
21058 .n(n)
21059 .k(k)
21060 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021061 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021062 }
21063 }
21064 }
21065 }
21066
Marat Dukhande06f492020-04-09 00:19:31 -070021067 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021068 TEST_REQUIRES_X86_FMA3;
21069 for (uint32_t n = 17; n < 32; n++) {
21070 for (size_t k = 1; k <= 5; k += 2) {
21071 GemmMicrokernelTester()
21072 .mr(5)
21073 .nr(16)
21074 .kr(1)
21075 .sr(1)
21076 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021077 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021078 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021079 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021080 }
21081 }
21082 }
21083
Marat Dukhande06f492020-04-09 00:19:31 -070021084 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021085 TEST_REQUIRES_X86_FMA3;
21086 for (uint32_t n = 17; n < 32; n++) {
21087 for (size_t k = 1; k <= 5; k += 2) {
21088 GemmMicrokernelTester()
21089 .mr(5)
21090 .nr(16)
21091 .kr(1)
21092 .sr(1)
21093 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021094 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021095 .k(k)
21096 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021097 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021098 }
21099 }
21100 }
21101
Marat Dukhande06f492020-04-09 00:19:31 -070021102 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021103 TEST_REQUIRES_X86_FMA3;
21104 for (uint32_t n = 17; n < 32; n++) {
21105 for (size_t k = 1; k <= 5; k += 2) {
21106 for (uint32_t m = 1; m <= 5; m++) {
21107 GemmMicrokernelTester()
21108 .mr(5)
21109 .nr(16)
21110 .kr(1)
21111 .sr(1)
21112 .m(m)
21113 .n(n)
21114 .k(k)
21115 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021116 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021117 }
21118 }
21119 }
21120 }
21121
Marat Dukhande06f492020-04-09 00:19:31 -070021122 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021123 TEST_REQUIRES_X86_FMA3;
21124 for (uint32_t n = 32; n <= 48; n += 16) {
21125 for (size_t k = 1; k <= 5; k += 2) {
21126 GemmMicrokernelTester()
21127 .mr(5)
21128 .nr(16)
21129 .kr(1)
21130 .sr(1)
21131 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021132 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021133 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021134 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021135 }
21136 }
21137 }
21138
Marat Dukhande06f492020-04-09 00:19:31 -070021139 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021140 TEST_REQUIRES_X86_FMA3;
21141 for (uint32_t n = 32; n <= 48; n += 16) {
21142 for (size_t k = 1; k <= 5; k += 2) {
21143 GemmMicrokernelTester()
21144 .mr(5)
21145 .nr(16)
21146 .kr(1)
21147 .sr(1)
21148 .m(5)
21149 .n(n)
21150 .k(k)
21151 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021152 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021153 }
21154 }
21155 }
21156
Marat Dukhande06f492020-04-09 00:19:31 -070021157 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021158 TEST_REQUIRES_X86_FMA3;
21159 for (uint32_t n = 32; n <= 48; n += 16) {
21160 for (size_t k = 1; k <= 5; k += 2) {
21161 for (uint32_t m = 1; m <= 5; m++) {
21162 GemmMicrokernelTester()
21163 .mr(5)
21164 .nr(16)
21165 .kr(1)
21166 .sr(1)
21167 .m(m)
21168 .n(n)
21169 .k(k)
21170 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021171 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021172 }
21173 }
21174 }
21175 }
21176
Marat Dukhande06f492020-04-09 00:19:31 -070021177 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021178 TEST_REQUIRES_X86_FMA3;
21179 for (size_t k = 1; k <= 5; k += 2) {
21180 GemmMicrokernelTester()
21181 .mr(5)
21182 .nr(16)
21183 .kr(1)
21184 .sr(1)
21185 .m(5)
21186 .n(16)
21187 .k(k)
21188 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021189 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021190 }
21191 }
21192
Marat Dukhande06f492020-04-09 00:19:31 -070021193 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021194 TEST_REQUIRES_X86_FMA3;
21195 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021196 for (uint32_t n = 1; n <= 16; n++) {
21197 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021198 GemmMicrokernelTester()
21199 .mr(5)
21200 .nr(16)
21201 .kr(1)
21202 .sr(1)
21203 .m(m)
21204 .n(n)
21205 .k(k)
21206 .ks(3)
21207 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021208 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021209 }
21210 }
21211 }
21212 }
21213
Marat Dukhande06f492020-04-09 00:19:31 -070021214 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021215 TEST_REQUIRES_X86_FMA3;
21216 for (uint32_t n = 17; n < 32; n++) {
21217 for (size_t k = 1; k <= 5; k += 2) {
21218 GemmMicrokernelTester()
21219 .mr(5)
21220 .nr(16)
21221 .kr(1)
21222 .sr(1)
21223 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021224 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021225 .k(k)
21226 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021227 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021228 }
21229 }
21230 }
21231
Marat Dukhande06f492020-04-09 00:19:31 -070021232 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021233 TEST_REQUIRES_X86_FMA3;
21234 for (uint32_t n = 32; n <= 48; n += 16) {
21235 for (size_t k = 1; k <= 5; k += 2) {
21236 GemmMicrokernelTester()
21237 .mr(5)
21238 .nr(16)
21239 .kr(1)
21240 .sr(1)
21241 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021242 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021243 .k(k)
21244 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021245 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021246 }
21247 }
21248 }
21249
Marat Dukhande06f492020-04-09 00:19:31 -070021250 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021251 TEST_REQUIRES_X86_FMA3;
21252 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021253 for (uint32_t n = 1; n <= 16; n++) {
21254 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021255 GemmMicrokernelTester()
21256 .mr(5)
21257 .nr(16)
21258 .kr(1)
21259 .sr(1)
21260 .m(m)
21261 .n(n)
21262 .k(k)
21263 .cm_stride(19)
21264 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021265 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021266 }
21267 }
21268 }
21269 }
21270
Marat Dukhande06f492020-04-09 00:19:31 -070021271 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021272 TEST_REQUIRES_X86_FMA3;
21273 for (size_t k = 1; k <= 5; k += 2) {
21274 GemmMicrokernelTester()
21275 .mr(5)
21276 .nr(16)
21277 .kr(1)
21278 .sr(1)
21279 .m(5)
21280 .n(16)
21281 .k(k)
21282 .ks(3)
21283 .a_offset(29)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021284 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021285 }
21286 }
21287
Marat Dukhande06f492020-04-09 00:19:31 -070021288 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021289 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021290 for (size_t k = 1; k <= 5; k += 2) {
21291 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021292 GemmMicrokernelTester()
21293 .mr(5)
21294 .nr(16)
21295 .kr(1)
21296 .sr(1)
21297 .m(5)
21298 .n(16)
21299 .k(k)
21300 .ks(3)
21301 .a_offset(29)
21302 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021303 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021304 }
21305 }
21306 }
21307
Marat Dukhande06f492020-04-09 00:19:31 -070021308 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021309 TEST_REQUIRES_X86_FMA3;
21310 GemmMicrokernelTester()
21311 .mr(5)
21312 .nr(16)
21313 .kr(1)
21314 .sr(1)
21315 .m(5)
21316 .n(16)
21317 .k(1)
21318 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021319 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021320 }
21321
Marat Dukhande06f492020-04-09 00:19:31 -070021322 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021323 TEST_REQUIRES_X86_FMA3;
21324 GemmMicrokernelTester()
21325 .mr(5)
21326 .nr(16)
21327 .kr(1)
21328 .sr(1)
21329 .m(5)
21330 .n(16)
21331 .k(1)
21332 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021333 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021334 }
21335
Marat Dukhande06f492020-04-09 00:19:31 -070021336 TEST(F32_IGEMM_MINMAX_5X16__FMA3_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021337 TEST_REQUIRES_X86_FMA3;
21338 GemmMicrokernelTester()
21339 .mr(5)
21340 .nr(16)
21341 .kr(1)
21342 .sr(1)
21343 .m(5)
21344 .n(16)
21345 .k(1)
21346 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021347 .Test(xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021348 }
21349#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21350
21351
21352#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070021353 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021354 TEST_REQUIRES_X86_AVX512F;
21355 GemmMicrokernelTester()
21356 .mr(4)
21357 .nr(16)
21358 .kr(1)
21359 .sr(1)
21360 .m(4)
21361 .n(16)
21362 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021363 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021364 }
21365
Marat Dukhande06f492020-04-09 00:19:31 -070021366 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021367 TEST_REQUIRES_X86_AVX512F;
21368 GemmMicrokernelTester()
21369 .mr(4)
21370 .nr(16)
21371 .kr(1)
21372 .sr(1)
21373 .m(4)
21374 .n(16)
21375 .k(1)
21376 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021377 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021378 }
21379
Marat Dukhande06f492020-04-09 00:19:31 -070021380 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021381 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021382 for (uint32_t n = 1; n <= 16; n++) {
21383 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021384 GemmMicrokernelTester()
21385 .mr(4)
21386 .nr(16)
21387 .kr(1)
21388 .sr(1)
21389 .m(m)
21390 .n(n)
21391 .k(1)
21392 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021393 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021394 }
21395 }
21396 }
21397
Marat Dukhande06f492020-04-09 00:19:31 -070021398 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021399 TEST_REQUIRES_X86_AVX512F;
21400 for (uint32_t m = 1; m <= 4; m++) {
21401 GemmMicrokernelTester()
21402 .mr(4)
21403 .nr(16)
21404 .kr(1)
21405 .sr(1)
21406 .m(m)
21407 .n(16)
21408 .k(1)
21409 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021410 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021411 }
21412 }
21413
Marat Dukhande06f492020-04-09 00:19:31 -070021414 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021415 TEST_REQUIRES_X86_AVX512F;
21416 for (uint32_t n = 1; n <= 16; n++) {
21417 GemmMicrokernelTester()
21418 .mr(4)
21419 .nr(16)
21420 .kr(1)
21421 .sr(1)
21422 .m(4)
21423 .n(n)
21424 .k(1)
21425 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021426 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021427 }
21428 }
21429
Marat Dukhande06f492020-04-09 00:19:31 -070021430 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021431 TEST_REQUIRES_X86_AVX512F;
21432 for (size_t k = 2; k < 10; k++) {
21433 GemmMicrokernelTester()
21434 .mr(4)
21435 .nr(16)
21436 .kr(1)
21437 .sr(1)
21438 .m(4)
21439 .n(16)
21440 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021441 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021442 }
21443 }
21444
Marat Dukhande06f492020-04-09 00:19:31 -070021445 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021446 TEST_REQUIRES_X86_AVX512F;
21447 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021448 for (uint32_t n = 1; n <= 16; n++) {
21449 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021450 GemmMicrokernelTester()
21451 .mr(4)
21452 .nr(16)
21453 .kr(1)
21454 .sr(1)
21455 .m(m)
21456 .n(n)
21457 .k(k)
21458 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021459 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021460 }
21461 }
21462 }
21463 }
21464
Marat Dukhande06f492020-04-09 00:19:31 -070021465 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021466 TEST_REQUIRES_X86_AVX512F;
21467 for (uint32_t n = 17; n < 32; n++) {
21468 for (size_t k = 1; k <= 5; k += 2) {
21469 GemmMicrokernelTester()
21470 .mr(4)
21471 .nr(16)
21472 .kr(1)
21473 .sr(1)
21474 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021475 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021476 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021477 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021478 }
21479 }
21480 }
21481
Marat Dukhande06f492020-04-09 00:19:31 -070021482 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021483 TEST_REQUIRES_X86_AVX512F;
21484 for (uint32_t n = 17; n < 32; n++) {
21485 for (size_t k = 1; k <= 5; k += 2) {
21486 GemmMicrokernelTester()
21487 .mr(4)
21488 .nr(16)
21489 .kr(1)
21490 .sr(1)
21491 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021492 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021493 .k(k)
21494 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021495 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021496 }
21497 }
21498 }
21499
Marat Dukhande06f492020-04-09 00:19:31 -070021500 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021501 TEST_REQUIRES_X86_AVX512F;
21502 for (uint32_t n = 17; n < 32; n++) {
21503 for (size_t k = 1; k <= 5; k += 2) {
21504 for (uint32_t m = 1; m <= 4; m++) {
21505 GemmMicrokernelTester()
21506 .mr(4)
21507 .nr(16)
21508 .kr(1)
21509 .sr(1)
21510 .m(m)
21511 .n(n)
21512 .k(k)
21513 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021514 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021515 }
21516 }
21517 }
21518 }
21519
Marat Dukhande06f492020-04-09 00:19:31 -070021520 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021521 TEST_REQUIRES_X86_AVX512F;
21522 for (uint32_t n = 32; n <= 48; n += 16) {
21523 for (size_t k = 1; k <= 5; k += 2) {
21524 GemmMicrokernelTester()
21525 .mr(4)
21526 .nr(16)
21527 .kr(1)
21528 .sr(1)
21529 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021530 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021531 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021532 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021533 }
21534 }
21535 }
21536
Marat Dukhande06f492020-04-09 00:19:31 -070021537 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021538 TEST_REQUIRES_X86_AVX512F;
21539 for (uint32_t n = 32; n <= 48; n += 16) {
21540 for (size_t k = 1; k <= 5; k += 2) {
21541 GemmMicrokernelTester()
21542 .mr(4)
21543 .nr(16)
21544 .kr(1)
21545 .sr(1)
21546 .m(4)
21547 .n(n)
21548 .k(k)
21549 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021550 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021551 }
21552 }
21553 }
21554
Marat Dukhande06f492020-04-09 00:19:31 -070021555 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021556 TEST_REQUIRES_X86_AVX512F;
21557 for (uint32_t n = 32; n <= 48; n += 16) {
21558 for (size_t k = 1; k <= 5; k += 2) {
21559 for (uint32_t m = 1; m <= 4; m++) {
21560 GemmMicrokernelTester()
21561 .mr(4)
21562 .nr(16)
21563 .kr(1)
21564 .sr(1)
21565 .m(m)
21566 .n(n)
21567 .k(k)
21568 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021569 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021570 }
21571 }
21572 }
21573 }
21574
Marat Dukhande06f492020-04-09 00:19:31 -070021575 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021576 TEST_REQUIRES_X86_AVX512F;
21577 for (size_t k = 1; k <= 5; k += 2) {
21578 GemmMicrokernelTester()
21579 .mr(4)
21580 .nr(16)
21581 .kr(1)
21582 .sr(1)
21583 .m(4)
21584 .n(16)
21585 .k(k)
21586 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021587 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021588 }
21589 }
21590
Marat Dukhande06f492020-04-09 00:19:31 -070021591 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021592 TEST_REQUIRES_X86_AVX512F;
21593 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021594 for (uint32_t n = 1; n <= 16; n++) {
21595 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021596 GemmMicrokernelTester()
21597 .mr(4)
21598 .nr(16)
21599 .kr(1)
21600 .sr(1)
21601 .m(m)
21602 .n(n)
21603 .k(k)
21604 .ks(3)
21605 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021606 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021607 }
21608 }
21609 }
21610 }
21611
Marat Dukhande06f492020-04-09 00:19:31 -070021612 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021613 TEST_REQUIRES_X86_AVX512F;
21614 for (uint32_t n = 17; n < 32; n++) {
21615 for (size_t k = 1; k <= 5; k += 2) {
21616 GemmMicrokernelTester()
21617 .mr(4)
21618 .nr(16)
21619 .kr(1)
21620 .sr(1)
21621 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021622 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021623 .k(k)
21624 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021625 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021626 }
21627 }
21628 }
21629
Marat Dukhande06f492020-04-09 00:19:31 -070021630 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021631 TEST_REQUIRES_X86_AVX512F;
21632 for (uint32_t n = 32; n <= 48; n += 16) {
21633 for (size_t k = 1; k <= 5; k += 2) {
21634 GemmMicrokernelTester()
21635 .mr(4)
21636 .nr(16)
21637 .kr(1)
21638 .sr(1)
21639 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021640 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021641 .k(k)
21642 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021643 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021644 }
21645 }
21646 }
21647
Marat Dukhande06f492020-04-09 00:19:31 -070021648 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021649 TEST_REQUIRES_X86_AVX512F;
21650 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021651 for (uint32_t n = 1; n <= 16; n++) {
21652 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021653 GemmMicrokernelTester()
21654 .mr(4)
21655 .nr(16)
21656 .kr(1)
21657 .sr(1)
21658 .m(m)
21659 .n(n)
21660 .k(k)
21661 .cm_stride(19)
21662 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021663 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021664 }
21665 }
21666 }
21667 }
21668
Marat Dukhande06f492020-04-09 00:19:31 -070021669 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021670 TEST_REQUIRES_X86_AVX512F;
21671 for (size_t k = 1; k <= 5; k += 2) {
21672 GemmMicrokernelTester()
21673 .mr(4)
21674 .nr(16)
21675 .kr(1)
21676 .sr(1)
21677 .m(4)
21678 .n(16)
21679 .k(k)
21680 .ks(3)
21681 .a_offset(23)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021682 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021683 }
21684 }
21685
Marat Dukhande06f492020-04-09 00:19:31 -070021686 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021687 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021688 for (size_t k = 1; k <= 5; k += 2) {
21689 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021690 GemmMicrokernelTester()
21691 .mr(4)
21692 .nr(16)
21693 .kr(1)
21694 .sr(1)
21695 .m(4)
21696 .n(16)
21697 .k(k)
21698 .ks(3)
21699 .a_offset(23)
21700 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021701 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021702 }
21703 }
21704 }
21705
Marat Dukhande06f492020-04-09 00:19:31 -070021706 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021707 TEST_REQUIRES_X86_AVX512F;
21708 GemmMicrokernelTester()
21709 .mr(4)
21710 .nr(16)
21711 .kr(1)
21712 .sr(1)
21713 .m(4)
21714 .n(16)
21715 .k(1)
21716 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021717 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021718 }
21719
Marat Dukhande06f492020-04-09 00:19:31 -070021720 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021721 TEST_REQUIRES_X86_AVX512F;
21722 GemmMicrokernelTester()
21723 .mr(4)
21724 .nr(16)
21725 .kr(1)
21726 .sr(1)
21727 .m(4)
21728 .n(16)
21729 .k(1)
21730 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021731 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021732 }
21733
Marat Dukhande06f492020-04-09 00:19:31 -070021734 TEST(F32_IGEMM_MINMAX_4X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021735 TEST_REQUIRES_X86_AVX512F;
21736 GemmMicrokernelTester()
21737 .mr(4)
21738 .nr(16)
21739 .kr(1)
21740 .sr(1)
21741 .m(4)
21742 .n(16)
21743 .k(1)
21744 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021745 .Test(xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021746 }
21747#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21748
21749
21750#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhande06f492020-04-09 00:19:31 -070021751 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021752 TEST_REQUIRES_X86_AVX512F;
21753 GemmMicrokernelTester()
21754 .mr(5)
21755 .nr(16)
21756 .kr(1)
21757 .sr(1)
21758 .m(5)
21759 .n(16)
21760 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021761 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021762 }
21763
Marat Dukhande06f492020-04-09 00:19:31 -070021764 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021765 TEST_REQUIRES_X86_AVX512F;
21766 GemmMicrokernelTester()
21767 .mr(5)
21768 .nr(16)
21769 .kr(1)
21770 .sr(1)
21771 .m(5)
21772 .n(16)
21773 .k(1)
21774 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021775 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021776 }
21777
Marat Dukhande06f492020-04-09 00:19:31 -070021778 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021779 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021780 for (uint32_t n = 1; n <= 16; n++) {
21781 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021782 GemmMicrokernelTester()
21783 .mr(5)
21784 .nr(16)
21785 .kr(1)
21786 .sr(1)
21787 .m(m)
21788 .n(n)
21789 .k(1)
21790 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021791 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021792 }
21793 }
21794 }
21795
Marat Dukhande06f492020-04-09 00:19:31 -070021796 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021797 TEST_REQUIRES_X86_AVX512F;
21798 for (uint32_t m = 1; m <= 5; m++) {
21799 GemmMicrokernelTester()
21800 .mr(5)
21801 .nr(16)
21802 .kr(1)
21803 .sr(1)
21804 .m(m)
21805 .n(16)
21806 .k(1)
21807 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021808 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021809 }
21810 }
21811
Marat Dukhande06f492020-04-09 00:19:31 -070021812 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021813 TEST_REQUIRES_X86_AVX512F;
21814 for (uint32_t n = 1; n <= 16; n++) {
21815 GemmMicrokernelTester()
21816 .mr(5)
21817 .nr(16)
21818 .kr(1)
21819 .sr(1)
21820 .m(5)
21821 .n(n)
21822 .k(1)
21823 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021824 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021825 }
21826 }
21827
Marat Dukhande06f492020-04-09 00:19:31 -070021828 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021829 TEST_REQUIRES_X86_AVX512F;
21830 for (size_t k = 2; k < 10; k++) {
21831 GemmMicrokernelTester()
21832 .mr(5)
21833 .nr(16)
21834 .kr(1)
21835 .sr(1)
21836 .m(5)
21837 .n(16)
21838 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021839 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021840 }
21841 }
21842
Marat Dukhande06f492020-04-09 00:19:31 -070021843 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021844 TEST_REQUIRES_X86_AVX512F;
21845 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021846 for (uint32_t n = 1; n <= 16; n++) {
21847 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021848 GemmMicrokernelTester()
21849 .mr(5)
21850 .nr(16)
21851 .kr(1)
21852 .sr(1)
21853 .m(m)
21854 .n(n)
21855 .k(k)
21856 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021857 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021858 }
21859 }
21860 }
21861 }
21862
Marat Dukhande06f492020-04-09 00:19:31 -070021863 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021864 TEST_REQUIRES_X86_AVX512F;
21865 for (uint32_t n = 17; n < 32; n++) {
21866 for (size_t k = 1; k <= 5; k += 2) {
21867 GemmMicrokernelTester()
21868 .mr(5)
21869 .nr(16)
21870 .kr(1)
21871 .sr(1)
21872 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021873 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021874 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021875 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021876 }
21877 }
21878 }
21879
Marat Dukhande06f492020-04-09 00:19:31 -070021880 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021881 TEST_REQUIRES_X86_AVX512F;
21882 for (uint32_t n = 17; n < 32; n++) {
21883 for (size_t k = 1; k <= 5; k += 2) {
21884 GemmMicrokernelTester()
21885 .mr(5)
21886 .nr(16)
21887 .kr(1)
21888 .sr(1)
21889 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021890 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021891 .k(k)
21892 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021893 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021894 }
21895 }
21896 }
21897
Marat Dukhande06f492020-04-09 00:19:31 -070021898 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021899 TEST_REQUIRES_X86_AVX512F;
21900 for (uint32_t n = 17; n < 32; n++) {
21901 for (size_t k = 1; k <= 5; k += 2) {
21902 for (uint32_t m = 1; m <= 5; m++) {
21903 GemmMicrokernelTester()
21904 .mr(5)
21905 .nr(16)
21906 .kr(1)
21907 .sr(1)
21908 .m(m)
21909 .n(n)
21910 .k(k)
21911 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021912 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021913 }
21914 }
21915 }
21916 }
21917
Marat Dukhande06f492020-04-09 00:19:31 -070021918 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021919 TEST_REQUIRES_X86_AVX512F;
21920 for (uint32_t n = 32; n <= 48; n += 16) {
21921 for (size_t k = 1; k <= 5; k += 2) {
21922 GemmMicrokernelTester()
21923 .mr(5)
21924 .nr(16)
21925 .kr(1)
21926 .sr(1)
21927 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021928 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070021929 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021930 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021931 }
21932 }
21933 }
21934
Marat Dukhande06f492020-04-09 00:19:31 -070021935 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021936 TEST_REQUIRES_X86_AVX512F;
21937 for (uint32_t n = 32; n <= 48; n += 16) {
21938 for (size_t k = 1; k <= 5; k += 2) {
21939 GemmMicrokernelTester()
21940 .mr(5)
21941 .nr(16)
21942 .kr(1)
21943 .sr(1)
21944 .m(5)
21945 .n(n)
21946 .k(k)
21947 .cn_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021948 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021949 }
21950 }
21951 }
21952
Marat Dukhande06f492020-04-09 00:19:31 -070021953 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021954 TEST_REQUIRES_X86_AVX512F;
21955 for (uint32_t n = 32; n <= 48; n += 16) {
21956 for (size_t k = 1; k <= 5; k += 2) {
21957 for (uint32_t m = 1; m <= 5; m++) {
21958 GemmMicrokernelTester()
21959 .mr(5)
21960 .nr(16)
21961 .kr(1)
21962 .sr(1)
21963 .m(m)
21964 .n(n)
21965 .k(k)
21966 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021967 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021968 }
21969 }
21970 }
21971 }
21972
Marat Dukhande06f492020-04-09 00:19:31 -070021973 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021974 TEST_REQUIRES_X86_AVX512F;
21975 for (size_t k = 1; k <= 5; k += 2) {
21976 GemmMicrokernelTester()
21977 .mr(5)
21978 .nr(16)
21979 .kr(1)
21980 .sr(1)
21981 .m(5)
21982 .n(16)
21983 .k(k)
21984 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070021985 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070021986 }
21987 }
21988
Marat Dukhande06f492020-04-09 00:19:31 -070021989 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021990 TEST_REQUIRES_X86_AVX512F;
21991 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021992 for (uint32_t n = 1; n <= 16; n++) {
21993 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070021994 GemmMicrokernelTester()
21995 .mr(5)
21996 .nr(16)
21997 .kr(1)
21998 .sr(1)
21999 .m(m)
22000 .n(n)
22001 .k(k)
22002 .ks(3)
22003 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022004 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022005 }
22006 }
22007 }
22008 }
22009
Marat Dukhande06f492020-04-09 00:19:31 -070022010 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022011 TEST_REQUIRES_X86_AVX512F;
22012 for (uint32_t n = 17; n < 32; n++) {
22013 for (size_t k = 1; k <= 5; k += 2) {
22014 GemmMicrokernelTester()
22015 .mr(5)
22016 .nr(16)
22017 .kr(1)
22018 .sr(1)
22019 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022020 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070022021 .k(k)
22022 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022023 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022024 }
22025 }
22026 }
22027
Marat Dukhande06f492020-04-09 00:19:31 -070022028 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022029 TEST_REQUIRES_X86_AVX512F;
22030 for (uint32_t n = 32; n <= 48; n += 16) {
22031 for (size_t k = 1; k <= 5; k += 2) {
22032 GemmMicrokernelTester()
22033 .mr(5)
22034 .nr(16)
22035 .kr(1)
22036 .sr(1)
22037 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022038 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070022039 .k(k)
22040 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022041 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022042 }
22043 }
22044 }
22045
Marat Dukhande06f492020-04-09 00:19:31 -070022046 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022047 TEST_REQUIRES_X86_AVX512F;
22048 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022049 for (uint32_t n = 1; n <= 16; n++) {
22050 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022051 GemmMicrokernelTester()
22052 .mr(5)
22053 .nr(16)
22054 .kr(1)
22055 .sr(1)
22056 .m(m)
22057 .n(n)
22058 .k(k)
22059 .cm_stride(19)
22060 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022061 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022062 }
22063 }
22064 }
22065 }
22066
Marat Dukhande06f492020-04-09 00:19:31 -070022067 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022068 TEST_REQUIRES_X86_AVX512F;
22069 for (size_t k = 1; k <= 5; k += 2) {
22070 GemmMicrokernelTester()
22071 .mr(5)
22072 .nr(16)
22073 .kr(1)
22074 .sr(1)
22075 .m(5)
22076 .n(16)
22077 .k(k)
22078 .ks(3)
22079 .a_offset(29)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022080 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022081 }
22082 }
22083
Marat Dukhande06f492020-04-09 00:19:31 -070022084 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, zero) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022085 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022086 for (size_t k = 1; k <= 5; k += 2) {
22087 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022088 GemmMicrokernelTester()
22089 .mr(5)
22090 .nr(16)
22091 .kr(1)
22092 .sr(1)
22093 .m(5)
22094 .n(16)
22095 .k(k)
22096 .ks(3)
22097 .a_offset(29)
22098 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022099 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022100 }
22101 }
22102 }
22103
Marat Dukhande06f492020-04-09 00:19:31 -070022104 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022105 TEST_REQUIRES_X86_AVX512F;
22106 GemmMicrokernelTester()
22107 .mr(5)
22108 .nr(16)
22109 .kr(1)
22110 .sr(1)
22111 .m(5)
22112 .n(16)
22113 .k(1)
22114 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022115 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022116 }
22117
Marat Dukhande06f492020-04-09 00:19:31 -070022118 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022119 TEST_REQUIRES_X86_AVX512F;
22120 GemmMicrokernelTester()
22121 .mr(5)
22122 .nr(16)
22123 .kr(1)
22124 .sr(1)
22125 .m(5)
22126 .n(16)
22127 .k(1)
22128 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022129 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022130 }
22131
Marat Dukhande06f492020-04-09 00:19:31 -070022132 TEST(F32_IGEMM_MINMAX_5X16__AVX512F_BROADCAST, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070022133 TEST_REQUIRES_X86_AVX512F;
22134 GemmMicrokernelTester()
22135 .mr(5)
22136 .nr(16)
22137 .kr(1)
22138 .sr(1)
22139 .m(5)
22140 .n(16)
22141 .k(1)
22142 .cm_stride(19)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070022143 .Test(xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070022144 }
22145#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22146
22147
Marat Dukhan4c617792021-12-21 15:47:58 -080022148#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022149 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022150 GemmMicrokernelTester()
22151 .mr(1)
22152 .nr(8)
22153 .kr(1)
22154 .sr(1)
22155 .m(1)
22156 .n(8)
22157 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022158 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022159 }
22160
Frank Barchard0725b8d2020-12-07 11:07:35 -080022161 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022162 GemmMicrokernelTester()
22163 .mr(1)
22164 .nr(8)
22165 .kr(1)
22166 .sr(1)
22167 .m(1)
22168 .n(8)
22169 .k(1)
22170 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022171 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022172 }
22173
Frank Barchard0725b8d2020-12-07 11:07:35 -080022174 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022175 for (uint32_t n = 1; n <= 8; n++) {
22176 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022177 GemmMicrokernelTester()
22178 .mr(1)
22179 .nr(8)
22180 .kr(1)
22181 .sr(1)
22182 .m(m)
22183 .n(n)
22184 .k(1)
22185 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022186 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022187 }
22188 }
22189 }
22190
Frank Barchard0725b8d2020-12-07 11:07:35 -080022191 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022192 for (uint32_t m = 1; m <= 1; m++) {
22193 GemmMicrokernelTester()
22194 .mr(1)
22195 .nr(8)
22196 .kr(1)
22197 .sr(1)
22198 .m(m)
22199 .n(8)
22200 .k(1)
22201 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022202 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022203 }
22204 }
22205
Frank Barchard0725b8d2020-12-07 11:07:35 -080022206 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022207 for (uint32_t n = 1; n <= 8; n++) {
22208 GemmMicrokernelTester()
22209 .mr(1)
22210 .nr(8)
22211 .kr(1)
22212 .sr(1)
22213 .m(1)
22214 .n(n)
22215 .k(1)
22216 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022217 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022218 }
22219 }
22220
Frank Barchard0725b8d2020-12-07 11:07:35 -080022221 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022222 for (size_t k = 2; k < 10; k++) {
22223 GemmMicrokernelTester()
22224 .mr(1)
22225 .nr(8)
22226 .kr(1)
22227 .sr(1)
22228 .m(1)
22229 .n(8)
22230 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022231 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022232 }
22233 }
22234
Frank Barchard0725b8d2020-12-07 11:07:35 -080022235 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022236 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022237 for (uint32_t n = 1; n <= 8; n++) {
22238 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022239 GemmMicrokernelTester()
22240 .mr(1)
22241 .nr(8)
22242 .kr(1)
22243 .sr(1)
22244 .m(m)
22245 .n(n)
22246 .k(k)
22247 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022248 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022249 }
22250 }
22251 }
22252 }
22253
Frank Barchard0725b8d2020-12-07 11:07:35 -080022254 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022255 for (uint32_t n = 9; n < 16; n++) {
22256 for (size_t k = 1; k <= 5; k += 2) {
22257 GemmMicrokernelTester()
22258 .mr(1)
22259 .nr(8)
22260 .kr(1)
22261 .sr(1)
22262 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022263 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022264 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022265 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022266 }
22267 }
22268 }
22269
Frank Barchard0725b8d2020-12-07 11:07:35 -080022270 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022271 for (uint32_t n = 9; n < 16; n++) {
22272 for (size_t k = 1; k <= 5; k += 2) {
22273 GemmMicrokernelTester()
22274 .mr(1)
22275 .nr(8)
22276 .kr(1)
22277 .sr(1)
22278 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022279 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022280 .k(k)
22281 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022282 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022283 }
22284 }
22285 }
22286
Frank Barchard0725b8d2020-12-07 11:07:35 -080022287 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022288 for (uint32_t n = 9; n < 16; n++) {
22289 for (size_t k = 1; k <= 5; k += 2) {
22290 for (uint32_t m = 1; m <= 1; m++) {
22291 GemmMicrokernelTester()
22292 .mr(1)
22293 .nr(8)
22294 .kr(1)
22295 .sr(1)
22296 .m(m)
22297 .n(n)
22298 .k(k)
22299 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022300 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022301 }
22302 }
22303 }
22304 }
22305
Frank Barchard0725b8d2020-12-07 11:07:35 -080022306 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022307 for (uint32_t n = 16; n <= 24; n += 8) {
22308 for (size_t k = 1; k <= 5; k += 2) {
22309 GemmMicrokernelTester()
22310 .mr(1)
22311 .nr(8)
22312 .kr(1)
22313 .sr(1)
22314 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022315 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022316 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022317 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022318 }
22319 }
22320 }
22321
Frank Barchard0725b8d2020-12-07 11:07:35 -080022322 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022323 for (uint32_t n = 16; n <= 24; n += 8) {
22324 for (size_t k = 1; k <= 5; k += 2) {
22325 GemmMicrokernelTester()
22326 .mr(1)
22327 .nr(8)
22328 .kr(1)
22329 .sr(1)
22330 .m(1)
22331 .n(n)
22332 .k(k)
22333 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022334 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022335 }
22336 }
22337 }
22338
Frank Barchard0725b8d2020-12-07 11:07:35 -080022339 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022340 for (uint32_t n = 16; n <= 24; n += 8) {
22341 for (size_t k = 1; k <= 5; k += 2) {
22342 for (uint32_t m = 1; m <= 1; m++) {
22343 GemmMicrokernelTester()
22344 .mr(1)
22345 .nr(8)
22346 .kr(1)
22347 .sr(1)
22348 .m(m)
22349 .n(n)
22350 .k(k)
22351 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022352 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022353 }
22354 }
22355 }
22356 }
22357
Frank Barchard0725b8d2020-12-07 11:07:35 -080022358 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022359 for (size_t k = 1; k <= 5; k += 2) {
22360 GemmMicrokernelTester()
22361 .mr(1)
22362 .nr(8)
22363 .kr(1)
22364 .sr(1)
22365 .m(1)
22366 .n(8)
22367 .k(k)
22368 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022369 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022370 }
22371 }
22372
Frank Barchard0725b8d2020-12-07 11:07:35 -080022373 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022374 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022375 for (uint32_t n = 1; n <= 8; n++) {
22376 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022377 GemmMicrokernelTester()
22378 .mr(1)
22379 .nr(8)
22380 .kr(1)
22381 .sr(1)
22382 .m(m)
22383 .n(n)
22384 .k(k)
22385 .ks(3)
22386 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022387 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022388 }
22389 }
22390 }
22391 }
22392
Frank Barchard0725b8d2020-12-07 11:07:35 -080022393 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022394 for (uint32_t n = 9; n < 16; n++) {
22395 for (size_t k = 1; k <= 5; k += 2) {
22396 GemmMicrokernelTester()
22397 .mr(1)
22398 .nr(8)
22399 .kr(1)
22400 .sr(1)
22401 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022402 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022403 .k(k)
22404 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022405 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022406 }
22407 }
22408 }
22409
Frank Barchard0725b8d2020-12-07 11:07:35 -080022410 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022411 for (uint32_t n = 16; n <= 24; n += 8) {
22412 for (size_t k = 1; k <= 5; k += 2) {
22413 GemmMicrokernelTester()
22414 .mr(1)
22415 .nr(8)
22416 .kr(1)
22417 .sr(1)
22418 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022419 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022420 .k(k)
22421 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022422 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022423 }
22424 }
22425 }
22426
Frank Barchard0725b8d2020-12-07 11:07:35 -080022427 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022428 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022429 for (uint32_t n = 1; n <= 8; n++) {
22430 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022431 GemmMicrokernelTester()
22432 .mr(1)
22433 .nr(8)
22434 .kr(1)
22435 .sr(1)
22436 .m(m)
22437 .n(n)
22438 .k(k)
22439 .cm_stride(11)
22440 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022441 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022442 }
22443 }
22444 }
22445 }
22446
Frank Barchard0725b8d2020-12-07 11:07:35 -080022447 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022448 for (size_t k = 1; k <= 5; k += 2) {
22449 GemmMicrokernelTester()
22450 .mr(1)
22451 .nr(8)
22452 .kr(1)
22453 .sr(1)
22454 .m(1)
22455 .n(8)
22456 .k(k)
22457 .ks(3)
22458 .a_offset(7)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022459 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022460 }
22461 }
22462
Frank Barchard0725b8d2020-12-07 11:07:35 -080022463 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022464 for (size_t k = 1; k <= 5; k += 2) {
22465 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022466 GemmMicrokernelTester()
22467 .mr(1)
22468 .nr(8)
22469 .kr(1)
22470 .sr(1)
22471 .m(1)
22472 .n(8)
22473 .k(k)
22474 .ks(3)
22475 .a_offset(7)
22476 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022477 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022478 }
22479 }
22480 }
22481
Frank Barchard0725b8d2020-12-07 11:07:35 -080022482 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022483 GemmMicrokernelTester()
22484 .mr(1)
22485 .nr(8)
22486 .kr(1)
22487 .sr(1)
22488 .m(1)
22489 .n(8)
22490 .k(1)
22491 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022492 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022493 }
22494
Frank Barchard0725b8d2020-12-07 11:07:35 -080022495 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022496 GemmMicrokernelTester()
22497 .mr(1)
22498 .nr(8)
22499 .kr(1)
22500 .sr(1)
22501 .m(1)
22502 .n(8)
22503 .k(1)
22504 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022505 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022506 }
22507
Frank Barchard0725b8d2020-12-07 11:07:35 -080022508 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022509 GemmMicrokernelTester()
22510 .mr(1)
22511 .nr(8)
22512 .kr(1)
22513 .sr(1)
22514 .m(1)
22515 .n(8)
22516 .k(1)
22517 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022518 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022519 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022520#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022521
22522
Marat Dukhan4c617792021-12-21 15:47:58 -080022523#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022524 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022525 GemmMicrokernelTester()
22526 .mr(3)
22527 .nr(8)
22528 .kr(1)
22529 .sr(1)
22530 .m(3)
22531 .n(8)
22532 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022533 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022534 }
22535
Frank Barchard0725b8d2020-12-07 11:07:35 -080022536 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022537 GemmMicrokernelTester()
22538 .mr(3)
22539 .nr(8)
22540 .kr(1)
22541 .sr(1)
22542 .m(3)
22543 .n(8)
22544 .k(1)
22545 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022546 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022547 }
22548
Frank Barchard0725b8d2020-12-07 11:07:35 -080022549 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022550 for (uint32_t n = 1; n <= 8; n++) {
22551 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022552 GemmMicrokernelTester()
22553 .mr(3)
22554 .nr(8)
22555 .kr(1)
22556 .sr(1)
22557 .m(m)
22558 .n(n)
22559 .k(1)
22560 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022561 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022562 }
22563 }
22564 }
22565
Frank Barchard0725b8d2020-12-07 11:07:35 -080022566 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022567 for (uint32_t m = 1; m <= 3; m++) {
22568 GemmMicrokernelTester()
22569 .mr(3)
22570 .nr(8)
22571 .kr(1)
22572 .sr(1)
22573 .m(m)
22574 .n(8)
22575 .k(1)
22576 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022577 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022578 }
22579 }
22580
Frank Barchard0725b8d2020-12-07 11:07:35 -080022581 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022582 for (uint32_t n = 1; n <= 8; n++) {
22583 GemmMicrokernelTester()
22584 .mr(3)
22585 .nr(8)
22586 .kr(1)
22587 .sr(1)
22588 .m(3)
22589 .n(n)
22590 .k(1)
22591 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022592 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022593 }
22594 }
22595
Frank Barchard0725b8d2020-12-07 11:07:35 -080022596 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022597 for (size_t k = 2; k < 10; k++) {
22598 GemmMicrokernelTester()
22599 .mr(3)
22600 .nr(8)
22601 .kr(1)
22602 .sr(1)
22603 .m(3)
22604 .n(8)
22605 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022606 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022607 }
22608 }
22609
Frank Barchard0725b8d2020-12-07 11:07:35 -080022610 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022611 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022612 for (uint32_t n = 1; n <= 8; n++) {
22613 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022614 GemmMicrokernelTester()
22615 .mr(3)
22616 .nr(8)
22617 .kr(1)
22618 .sr(1)
22619 .m(m)
22620 .n(n)
22621 .k(k)
22622 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022623 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022624 }
22625 }
22626 }
22627 }
22628
Frank Barchard0725b8d2020-12-07 11:07:35 -080022629 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022630 for (uint32_t n = 9; n < 16; n++) {
22631 for (size_t k = 1; k <= 5; k += 2) {
22632 GemmMicrokernelTester()
22633 .mr(3)
22634 .nr(8)
22635 .kr(1)
22636 .sr(1)
22637 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022638 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022639 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022640 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022641 }
22642 }
22643 }
22644
Frank Barchard0725b8d2020-12-07 11:07:35 -080022645 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022646 for (uint32_t n = 9; n < 16; n++) {
22647 for (size_t k = 1; k <= 5; k += 2) {
22648 GemmMicrokernelTester()
22649 .mr(3)
22650 .nr(8)
22651 .kr(1)
22652 .sr(1)
22653 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022654 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022655 .k(k)
22656 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022657 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022658 }
22659 }
22660 }
22661
Frank Barchard0725b8d2020-12-07 11:07:35 -080022662 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022663 for (uint32_t n = 9; n < 16; n++) {
22664 for (size_t k = 1; k <= 5; k += 2) {
22665 for (uint32_t m = 1; m <= 3; m++) {
22666 GemmMicrokernelTester()
22667 .mr(3)
22668 .nr(8)
22669 .kr(1)
22670 .sr(1)
22671 .m(m)
22672 .n(n)
22673 .k(k)
22674 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022675 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022676 }
22677 }
22678 }
22679 }
22680
Frank Barchard0725b8d2020-12-07 11:07:35 -080022681 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022682 for (uint32_t n = 16; n <= 24; n += 8) {
22683 for (size_t k = 1; k <= 5; k += 2) {
22684 GemmMicrokernelTester()
22685 .mr(3)
22686 .nr(8)
22687 .kr(1)
22688 .sr(1)
22689 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022690 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022691 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022692 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022693 }
22694 }
22695 }
22696
Frank Barchard0725b8d2020-12-07 11:07:35 -080022697 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022698 for (uint32_t n = 16; n <= 24; n += 8) {
22699 for (size_t k = 1; k <= 5; k += 2) {
22700 GemmMicrokernelTester()
22701 .mr(3)
22702 .nr(8)
22703 .kr(1)
22704 .sr(1)
22705 .m(3)
22706 .n(n)
22707 .k(k)
22708 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022709 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022710 }
22711 }
22712 }
22713
Frank Barchard0725b8d2020-12-07 11:07:35 -080022714 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022715 for (uint32_t n = 16; n <= 24; n += 8) {
22716 for (size_t k = 1; k <= 5; k += 2) {
22717 for (uint32_t m = 1; m <= 3; m++) {
22718 GemmMicrokernelTester()
22719 .mr(3)
22720 .nr(8)
22721 .kr(1)
22722 .sr(1)
22723 .m(m)
22724 .n(n)
22725 .k(k)
22726 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022727 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022728 }
22729 }
22730 }
22731 }
22732
Frank Barchard0725b8d2020-12-07 11:07:35 -080022733 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022734 for (size_t k = 1; k <= 5; k += 2) {
22735 GemmMicrokernelTester()
22736 .mr(3)
22737 .nr(8)
22738 .kr(1)
22739 .sr(1)
22740 .m(3)
22741 .n(8)
22742 .k(k)
22743 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022744 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022745 }
22746 }
22747
Frank Barchard0725b8d2020-12-07 11:07:35 -080022748 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022749 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022750 for (uint32_t n = 1; n <= 8; n++) {
22751 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022752 GemmMicrokernelTester()
22753 .mr(3)
22754 .nr(8)
22755 .kr(1)
22756 .sr(1)
22757 .m(m)
22758 .n(n)
22759 .k(k)
22760 .ks(3)
22761 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022762 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022763 }
22764 }
22765 }
22766 }
22767
Frank Barchard0725b8d2020-12-07 11:07:35 -080022768 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022769 for (uint32_t n = 9; n < 16; n++) {
22770 for (size_t k = 1; k <= 5; k += 2) {
22771 GemmMicrokernelTester()
22772 .mr(3)
22773 .nr(8)
22774 .kr(1)
22775 .sr(1)
22776 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022777 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022778 .k(k)
22779 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022780 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022781 }
22782 }
22783 }
22784
Frank Barchard0725b8d2020-12-07 11:07:35 -080022785 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022786 for (uint32_t n = 16; n <= 24; n += 8) {
22787 for (size_t k = 1; k <= 5; k += 2) {
22788 GemmMicrokernelTester()
22789 .mr(3)
22790 .nr(8)
22791 .kr(1)
22792 .sr(1)
22793 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022794 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022795 .k(k)
22796 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022797 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022798 }
22799 }
22800 }
22801
Frank Barchard0725b8d2020-12-07 11:07:35 -080022802 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022803 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022804 for (uint32_t n = 1; n <= 8; n++) {
22805 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022806 GemmMicrokernelTester()
22807 .mr(3)
22808 .nr(8)
22809 .kr(1)
22810 .sr(1)
22811 .m(m)
22812 .n(n)
22813 .k(k)
22814 .cm_stride(11)
22815 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022816 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022817 }
22818 }
22819 }
22820 }
22821
Frank Barchard0725b8d2020-12-07 11:07:35 -080022822 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022823 for (size_t k = 1; k <= 5; k += 2) {
22824 GemmMicrokernelTester()
22825 .mr(3)
22826 .nr(8)
22827 .kr(1)
22828 .sr(1)
22829 .m(3)
22830 .n(8)
22831 .k(k)
22832 .ks(3)
22833 .a_offset(17)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022834 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022835 }
22836 }
22837
Frank Barchard0725b8d2020-12-07 11:07:35 -080022838 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022839 for (size_t k = 1; k <= 5; k += 2) {
22840 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022841 GemmMicrokernelTester()
22842 .mr(3)
22843 .nr(8)
22844 .kr(1)
22845 .sr(1)
22846 .m(3)
22847 .n(8)
22848 .k(k)
22849 .ks(3)
22850 .a_offset(17)
22851 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022852 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022853 }
22854 }
22855 }
22856
Frank Barchard0725b8d2020-12-07 11:07:35 -080022857 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022858 GemmMicrokernelTester()
22859 .mr(3)
22860 .nr(8)
22861 .kr(1)
22862 .sr(1)
22863 .m(3)
22864 .n(8)
22865 .k(1)
22866 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022867 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022868 }
22869
Frank Barchard0725b8d2020-12-07 11:07:35 -080022870 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022871 GemmMicrokernelTester()
22872 .mr(3)
22873 .nr(8)
22874 .kr(1)
22875 .sr(1)
22876 .m(3)
22877 .n(8)
22878 .k(1)
22879 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022880 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022881 }
22882
Frank Barchard0725b8d2020-12-07 11:07:35 -080022883 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022884 GemmMicrokernelTester()
22885 .mr(3)
22886 .nr(8)
22887 .kr(1)
22888 .sr(1)
22889 .m(3)
22890 .n(8)
22891 .k(1)
22892 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022893 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022894 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022895#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022896
22897
Marat Dukhan4c617792021-12-21 15:47:58 -080022898#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080022899 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022900 GemmMicrokernelTester()
22901 .mr(4)
22902 .nr(8)
22903 .kr(1)
22904 .sr(1)
22905 .m(4)
22906 .n(8)
22907 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022908 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022909 }
22910
Frank Barchard0725b8d2020-12-07 11:07:35 -080022911 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022912 GemmMicrokernelTester()
22913 .mr(4)
22914 .nr(8)
22915 .kr(1)
22916 .sr(1)
22917 .m(4)
22918 .n(8)
22919 .k(1)
22920 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022921 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022922 }
22923
Frank Barchard0725b8d2020-12-07 11:07:35 -080022924 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022925 for (uint32_t n = 1; n <= 8; n++) {
22926 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022927 GemmMicrokernelTester()
22928 .mr(4)
22929 .nr(8)
22930 .kr(1)
22931 .sr(1)
22932 .m(m)
22933 .n(n)
22934 .k(1)
22935 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022936 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022937 }
22938 }
22939 }
22940
Frank Barchard0725b8d2020-12-07 11:07:35 -080022941 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022942 for (uint32_t m = 1; m <= 4; m++) {
22943 GemmMicrokernelTester()
22944 .mr(4)
22945 .nr(8)
22946 .kr(1)
22947 .sr(1)
22948 .m(m)
22949 .n(8)
22950 .k(1)
22951 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022952 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022953 }
22954 }
22955
Frank Barchard0725b8d2020-12-07 11:07:35 -080022956 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022957 for (uint32_t n = 1; n <= 8; n++) {
22958 GemmMicrokernelTester()
22959 .mr(4)
22960 .nr(8)
22961 .kr(1)
22962 .sr(1)
22963 .m(4)
22964 .n(n)
22965 .k(1)
22966 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022967 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022968 }
22969 }
22970
Frank Barchard0725b8d2020-12-07 11:07:35 -080022971 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022972 for (size_t k = 2; k < 10; k++) {
22973 GemmMicrokernelTester()
22974 .mr(4)
22975 .nr(8)
22976 .kr(1)
22977 .sr(1)
22978 .m(4)
22979 .n(8)
22980 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022981 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022982 }
22983 }
22984
Frank Barchard0725b8d2020-12-07 11:07:35 -080022985 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022986 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022987 for (uint32_t n = 1; n <= 8; n++) {
22988 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022989 GemmMicrokernelTester()
22990 .mr(4)
22991 .nr(8)
22992 .kr(1)
22993 .sr(1)
22994 .m(m)
22995 .n(n)
22996 .k(k)
22997 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080022998 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070022999 }
23000 }
23001 }
23002 }
23003
Frank Barchard0725b8d2020-12-07 11:07:35 -080023004 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023005 for (uint32_t n = 9; n < 16; n++) {
23006 for (size_t k = 1; k <= 5; k += 2) {
23007 GemmMicrokernelTester()
23008 .mr(4)
23009 .nr(8)
23010 .kr(1)
23011 .sr(1)
23012 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023013 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023014 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023015 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023016 }
23017 }
23018 }
23019
Frank Barchard0725b8d2020-12-07 11:07:35 -080023020 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023021 for (uint32_t n = 9; n < 16; n++) {
23022 for (size_t k = 1; k <= 5; k += 2) {
23023 GemmMicrokernelTester()
23024 .mr(4)
23025 .nr(8)
23026 .kr(1)
23027 .sr(1)
23028 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023029 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023030 .k(k)
23031 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023032 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023033 }
23034 }
23035 }
23036
Frank Barchard0725b8d2020-12-07 11:07:35 -080023037 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023038 for (uint32_t n = 9; n < 16; n++) {
23039 for (size_t k = 1; k <= 5; k += 2) {
23040 for (uint32_t m = 1; m <= 4; m++) {
23041 GemmMicrokernelTester()
23042 .mr(4)
23043 .nr(8)
23044 .kr(1)
23045 .sr(1)
23046 .m(m)
23047 .n(n)
23048 .k(k)
23049 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023050 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023051 }
23052 }
23053 }
23054 }
23055
Frank Barchard0725b8d2020-12-07 11:07:35 -080023056 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023057 for (uint32_t n = 16; n <= 24; n += 8) {
23058 for (size_t k = 1; k <= 5; k += 2) {
23059 GemmMicrokernelTester()
23060 .mr(4)
23061 .nr(8)
23062 .kr(1)
23063 .sr(1)
23064 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023065 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023066 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023067 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023068 }
23069 }
23070 }
23071
Frank Barchard0725b8d2020-12-07 11:07:35 -080023072 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023073 for (uint32_t n = 16; n <= 24; n += 8) {
23074 for (size_t k = 1; k <= 5; k += 2) {
23075 GemmMicrokernelTester()
23076 .mr(4)
23077 .nr(8)
23078 .kr(1)
23079 .sr(1)
23080 .m(4)
23081 .n(n)
23082 .k(k)
23083 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023084 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023085 }
23086 }
23087 }
23088
Frank Barchard0725b8d2020-12-07 11:07:35 -080023089 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023090 for (uint32_t n = 16; n <= 24; n += 8) {
23091 for (size_t k = 1; k <= 5; k += 2) {
23092 for (uint32_t m = 1; m <= 4; m++) {
23093 GemmMicrokernelTester()
23094 .mr(4)
23095 .nr(8)
23096 .kr(1)
23097 .sr(1)
23098 .m(m)
23099 .n(n)
23100 .k(k)
23101 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023102 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023103 }
23104 }
23105 }
23106 }
23107
Frank Barchard0725b8d2020-12-07 11:07:35 -080023108 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023109 for (size_t k = 1; k <= 5; k += 2) {
23110 GemmMicrokernelTester()
23111 .mr(4)
23112 .nr(8)
23113 .kr(1)
23114 .sr(1)
23115 .m(4)
23116 .n(8)
23117 .k(k)
23118 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023119 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023120 }
23121 }
23122
Frank Barchard0725b8d2020-12-07 11:07:35 -080023123 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023124 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023125 for (uint32_t n = 1; n <= 8; n++) {
23126 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023127 GemmMicrokernelTester()
23128 .mr(4)
23129 .nr(8)
23130 .kr(1)
23131 .sr(1)
23132 .m(m)
23133 .n(n)
23134 .k(k)
23135 .ks(3)
23136 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023137 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023138 }
23139 }
23140 }
23141 }
23142
Frank Barchard0725b8d2020-12-07 11:07:35 -080023143 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023144 for (uint32_t n = 9; n < 16; n++) {
23145 for (size_t k = 1; k <= 5; k += 2) {
23146 GemmMicrokernelTester()
23147 .mr(4)
23148 .nr(8)
23149 .kr(1)
23150 .sr(1)
23151 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023152 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023153 .k(k)
23154 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023155 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023156 }
23157 }
23158 }
23159
Frank Barchard0725b8d2020-12-07 11:07:35 -080023160 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023161 for (uint32_t n = 16; n <= 24; n += 8) {
23162 for (size_t k = 1; k <= 5; k += 2) {
23163 GemmMicrokernelTester()
23164 .mr(4)
23165 .nr(8)
23166 .kr(1)
23167 .sr(1)
23168 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023169 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023170 .k(k)
23171 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023172 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023173 }
23174 }
23175 }
23176
Frank Barchard0725b8d2020-12-07 11:07:35 -080023177 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023178 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023179 for (uint32_t n = 1; n <= 8; n++) {
23180 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023181 GemmMicrokernelTester()
23182 .mr(4)
23183 .nr(8)
23184 .kr(1)
23185 .sr(1)
23186 .m(m)
23187 .n(n)
23188 .k(k)
23189 .cm_stride(11)
23190 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023191 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023192 }
23193 }
23194 }
23195 }
23196
Frank Barchard0725b8d2020-12-07 11:07:35 -080023197 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023198 for (size_t k = 1; k <= 5; k += 2) {
23199 GemmMicrokernelTester()
23200 .mr(4)
23201 .nr(8)
23202 .kr(1)
23203 .sr(1)
23204 .m(4)
23205 .n(8)
23206 .k(k)
23207 .ks(3)
23208 .a_offset(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023209 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023210 }
23211 }
23212
Frank Barchard0725b8d2020-12-07 11:07:35 -080023213 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023214 for (size_t k = 1; k <= 5; k += 2) {
23215 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023216 GemmMicrokernelTester()
23217 .mr(4)
23218 .nr(8)
23219 .kr(1)
23220 .sr(1)
23221 .m(4)
23222 .n(8)
23223 .k(k)
23224 .ks(3)
23225 .a_offset(23)
23226 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023227 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023228 }
23229 }
23230 }
23231
Frank Barchard0725b8d2020-12-07 11:07:35 -080023232 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023233 GemmMicrokernelTester()
23234 .mr(4)
23235 .nr(8)
23236 .kr(1)
23237 .sr(1)
23238 .m(4)
23239 .n(8)
23240 .k(1)
23241 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023242 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023243 }
23244
Frank Barchard0725b8d2020-12-07 11:07:35 -080023245 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023246 GemmMicrokernelTester()
23247 .mr(4)
23248 .nr(8)
23249 .kr(1)
23250 .sr(1)
23251 .m(4)
23252 .n(8)
23253 .k(1)
23254 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023255 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023256 }
23257
Frank Barchard0725b8d2020-12-07 11:07:35 -080023258 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023259 GemmMicrokernelTester()
23260 .mr(4)
23261 .nr(8)
23262 .kr(1)
23263 .sr(1)
23264 .m(4)
23265 .n(8)
23266 .k(1)
23267 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023268 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023269 }
Marat Dukhan4c617792021-12-21 15:47:58 -080023270#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023271
23272
Marat Dukhan4c617792021-12-21 15:47:58 -080023273#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080023274 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023275 GemmMicrokernelTester()
23276 .mr(6)
23277 .nr(8)
23278 .kr(1)
23279 .sr(1)
23280 .m(6)
23281 .n(8)
23282 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023283 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023284 }
23285
Frank Barchard0725b8d2020-12-07 11:07:35 -080023286 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023287 GemmMicrokernelTester()
23288 .mr(6)
23289 .nr(8)
23290 .kr(1)
23291 .sr(1)
23292 .m(6)
23293 .n(8)
23294 .k(1)
23295 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023296 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023297 }
23298
Frank Barchard0725b8d2020-12-07 11:07:35 -080023299 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023300 for (uint32_t n = 1; n <= 8; n++) {
23301 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023302 GemmMicrokernelTester()
23303 .mr(6)
23304 .nr(8)
23305 .kr(1)
23306 .sr(1)
23307 .m(m)
23308 .n(n)
23309 .k(1)
23310 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023311 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023312 }
23313 }
23314 }
23315
Frank Barchard0725b8d2020-12-07 11:07:35 -080023316 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023317 for (uint32_t m = 1; m <= 6; m++) {
23318 GemmMicrokernelTester()
23319 .mr(6)
23320 .nr(8)
23321 .kr(1)
23322 .sr(1)
23323 .m(m)
23324 .n(8)
23325 .k(1)
23326 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023327 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023328 }
23329 }
23330
Frank Barchard0725b8d2020-12-07 11:07:35 -080023331 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023332 for (uint32_t n = 1; n <= 8; n++) {
23333 GemmMicrokernelTester()
23334 .mr(6)
23335 .nr(8)
23336 .kr(1)
23337 .sr(1)
23338 .m(6)
23339 .n(n)
23340 .k(1)
23341 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023342 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023343 }
23344 }
23345
Frank Barchard0725b8d2020-12-07 11:07:35 -080023346 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023347 for (size_t k = 2; k < 10; k++) {
23348 GemmMicrokernelTester()
23349 .mr(6)
23350 .nr(8)
23351 .kr(1)
23352 .sr(1)
23353 .m(6)
23354 .n(8)
23355 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023356 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023357 }
23358 }
23359
Frank Barchard0725b8d2020-12-07 11:07:35 -080023360 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023361 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023362 for (uint32_t n = 1; n <= 8; n++) {
23363 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023364 GemmMicrokernelTester()
23365 .mr(6)
23366 .nr(8)
23367 .kr(1)
23368 .sr(1)
23369 .m(m)
23370 .n(n)
23371 .k(k)
23372 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023373 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023374 }
23375 }
23376 }
23377 }
23378
Frank Barchard0725b8d2020-12-07 11:07:35 -080023379 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023380 for (uint32_t n = 9; n < 16; n++) {
23381 for (size_t k = 1; k <= 5; k += 2) {
23382 GemmMicrokernelTester()
23383 .mr(6)
23384 .nr(8)
23385 .kr(1)
23386 .sr(1)
23387 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023388 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023389 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023390 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023391 }
23392 }
23393 }
23394
Frank Barchard0725b8d2020-12-07 11:07:35 -080023395 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023396 for (uint32_t n = 9; n < 16; n++) {
23397 for (size_t k = 1; k <= 5; k += 2) {
23398 GemmMicrokernelTester()
23399 .mr(6)
23400 .nr(8)
23401 .kr(1)
23402 .sr(1)
23403 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023404 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023405 .k(k)
23406 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023407 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023408 }
23409 }
23410 }
23411
Frank Barchard0725b8d2020-12-07 11:07:35 -080023412 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023413 for (uint32_t n = 9; n < 16; n++) {
23414 for (size_t k = 1; k <= 5; k += 2) {
23415 for (uint32_t m = 1; m <= 6; m++) {
23416 GemmMicrokernelTester()
23417 .mr(6)
23418 .nr(8)
23419 .kr(1)
23420 .sr(1)
23421 .m(m)
23422 .n(n)
23423 .k(k)
23424 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023425 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023426 }
23427 }
23428 }
23429 }
23430
Frank Barchard0725b8d2020-12-07 11:07:35 -080023431 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023432 for (uint32_t n = 16; n <= 24; n += 8) {
23433 for (size_t k = 1; k <= 5; k += 2) {
23434 GemmMicrokernelTester()
23435 .mr(6)
23436 .nr(8)
23437 .kr(1)
23438 .sr(1)
23439 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023440 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023441 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023442 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023443 }
23444 }
23445 }
23446
Frank Barchard0725b8d2020-12-07 11:07:35 -080023447 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023448 for (uint32_t n = 16; n <= 24; n += 8) {
23449 for (size_t k = 1; k <= 5; k += 2) {
23450 GemmMicrokernelTester()
23451 .mr(6)
23452 .nr(8)
23453 .kr(1)
23454 .sr(1)
23455 .m(6)
23456 .n(n)
23457 .k(k)
23458 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023459 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023460 }
23461 }
23462 }
23463
Frank Barchard0725b8d2020-12-07 11:07:35 -080023464 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023465 for (uint32_t n = 16; n <= 24; n += 8) {
23466 for (size_t k = 1; k <= 5; k += 2) {
23467 for (uint32_t m = 1; m <= 6; m++) {
23468 GemmMicrokernelTester()
23469 .mr(6)
23470 .nr(8)
23471 .kr(1)
23472 .sr(1)
23473 .m(m)
23474 .n(n)
23475 .k(k)
23476 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023477 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023478 }
23479 }
23480 }
23481 }
23482
Frank Barchard0725b8d2020-12-07 11:07:35 -080023483 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023484 for (size_t k = 1; k <= 5; k += 2) {
23485 GemmMicrokernelTester()
23486 .mr(6)
23487 .nr(8)
23488 .kr(1)
23489 .sr(1)
23490 .m(6)
23491 .n(8)
23492 .k(k)
23493 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023494 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023495 }
23496 }
23497
Frank Barchard0725b8d2020-12-07 11:07:35 -080023498 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023499 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023500 for (uint32_t n = 1; n <= 8; n++) {
23501 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023502 GemmMicrokernelTester()
23503 .mr(6)
23504 .nr(8)
23505 .kr(1)
23506 .sr(1)
23507 .m(m)
23508 .n(n)
23509 .k(k)
23510 .ks(3)
23511 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023512 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023513 }
23514 }
23515 }
23516 }
23517
Frank Barchard0725b8d2020-12-07 11:07:35 -080023518 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023519 for (uint32_t n = 9; n < 16; n++) {
23520 for (size_t k = 1; k <= 5; k += 2) {
23521 GemmMicrokernelTester()
23522 .mr(6)
23523 .nr(8)
23524 .kr(1)
23525 .sr(1)
23526 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023527 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023528 .k(k)
23529 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023530 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023531 }
23532 }
23533 }
23534
Frank Barchard0725b8d2020-12-07 11:07:35 -080023535 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023536 for (uint32_t n = 16; n <= 24; n += 8) {
23537 for (size_t k = 1; k <= 5; k += 2) {
23538 GemmMicrokernelTester()
23539 .mr(6)
23540 .nr(8)
23541 .kr(1)
23542 .sr(1)
23543 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023544 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023545 .k(k)
23546 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023547 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023548 }
23549 }
23550 }
23551
Frank Barchard0725b8d2020-12-07 11:07:35 -080023552 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023553 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023554 for (uint32_t n = 1; n <= 8; n++) {
23555 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023556 GemmMicrokernelTester()
23557 .mr(6)
23558 .nr(8)
23559 .kr(1)
23560 .sr(1)
23561 .m(m)
23562 .n(n)
23563 .k(k)
23564 .cm_stride(11)
23565 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023566 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023567 }
23568 }
23569 }
23570 }
23571
Frank Barchard0725b8d2020-12-07 11:07:35 -080023572 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023573 for (size_t k = 1; k <= 5; k += 2) {
23574 GemmMicrokernelTester()
23575 .mr(6)
23576 .nr(8)
23577 .kr(1)
23578 .sr(1)
23579 .m(6)
23580 .n(8)
23581 .k(k)
23582 .ks(3)
23583 .a_offset(37)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023584 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023585 }
23586 }
23587
Frank Barchard0725b8d2020-12-07 11:07:35 -080023588 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023589 for (size_t k = 1; k <= 5; k += 2) {
23590 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023591 GemmMicrokernelTester()
23592 .mr(6)
23593 .nr(8)
23594 .kr(1)
23595 .sr(1)
23596 .m(6)
23597 .n(8)
23598 .k(k)
23599 .ks(3)
23600 .a_offset(37)
23601 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023602 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023603 }
23604 }
23605 }
23606
Frank Barchard0725b8d2020-12-07 11:07:35 -080023607 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023608 GemmMicrokernelTester()
23609 .mr(6)
23610 .nr(8)
23611 .kr(1)
23612 .sr(1)
23613 .m(6)
23614 .n(8)
23615 .k(1)
23616 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023617 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023618 }
23619
Frank Barchard0725b8d2020-12-07 11:07:35 -080023620 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023621 GemmMicrokernelTester()
23622 .mr(6)
23623 .nr(8)
23624 .kr(1)
23625 .sr(1)
23626 .m(6)
23627 .n(8)
23628 .k(1)
23629 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023630 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023631 }
23632
Frank Barchard0725b8d2020-12-07 11:07:35 -080023633 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023634 GemmMicrokernelTester()
23635 .mr(6)
23636 .nr(8)
23637 .kr(1)
23638 .sr(1)
23639 .m(6)
23640 .n(8)
23641 .k(1)
23642 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023643 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023644 }
Marat Dukhan4c617792021-12-21 15:47:58 -080023645#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023646
23647
Marat Dukhan4c617792021-12-21 15:47:58 -080023648#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080023649 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023650 GemmMicrokernelTester()
23651 .mr(5)
23652 .nr(8)
23653 .kr(1)
23654 .sr(1)
23655 .m(5)
23656 .n(8)
23657 .k(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023658 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023659 }
23660
Frank Barchard0725b8d2020-12-07 11:07:35 -080023661 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023662 GemmMicrokernelTester()
23663 .mr(5)
23664 .nr(8)
23665 .kr(1)
23666 .sr(1)
23667 .m(5)
23668 .n(8)
23669 .k(1)
23670 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023671 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023672 }
23673
Frank Barchard0725b8d2020-12-07 11:07:35 -080023674 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023675 for (uint32_t n = 1; n <= 8; n++) {
23676 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023677 GemmMicrokernelTester()
23678 .mr(5)
23679 .nr(8)
23680 .kr(1)
23681 .sr(1)
23682 .m(m)
23683 .n(n)
23684 .k(1)
23685 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023686 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023687 }
23688 }
23689 }
23690
Frank Barchard0725b8d2020-12-07 11:07:35 -080023691 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023692 for (uint32_t m = 1; m <= 5; m++) {
23693 GemmMicrokernelTester()
23694 .mr(5)
23695 .nr(8)
23696 .kr(1)
23697 .sr(1)
23698 .m(m)
23699 .n(8)
23700 .k(1)
23701 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023702 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023703 }
23704 }
23705
Frank Barchard0725b8d2020-12-07 11:07:35 -080023706 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023707 for (uint32_t n = 1; n <= 8; n++) {
23708 GemmMicrokernelTester()
23709 .mr(5)
23710 .nr(8)
23711 .kr(1)
23712 .sr(1)
23713 .m(5)
23714 .n(n)
23715 .k(1)
23716 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023717 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023718 }
23719 }
23720
Frank Barchard0725b8d2020-12-07 11:07:35 -080023721 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023722 for (size_t k = 2; k < 10; k++) {
23723 GemmMicrokernelTester()
23724 .mr(5)
23725 .nr(8)
23726 .kr(1)
23727 .sr(1)
23728 .m(5)
23729 .n(8)
23730 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023731 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023732 }
23733 }
23734
Frank Barchard0725b8d2020-12-07 11:07:35 -080023735 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023736 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023737 for (uint32_t n = 1; n <= 8; n++) {
23738 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023739 GemmMicrokernelTester()
23740 .mr(5)
23741 .nr(8)
23742 .kr(1)
23743 .sr(1)
23744 .m(m)
23745 .n(n)
23746 .k(k)
23747 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023748 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023749 }
23750 }
23751 }
23752 }
23753
Frank Barchard0725b8d2020-12-07 11:07:35 -080023754 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023755 for (uint32_t n = 9; n < 16; n++) {
23756 for (size_t k = 1; k <= 5; k += 2) {
23757 GemmMicrokernelTester()
23758 .mr(5)
23759 .nr(8)
23760 .kr(1)
23761 .sr(1)
23762 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023763 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023764 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023765 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023766 }
23767 }
23768 }
23769
Frank Barchard0725b8d2020-12-07 11:07:35 -080023770 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023771 for (uint32_t n = 9; n < 16; n++) {
23772 for (size_t k = 1; k <= 5; k += 2) {
23773 GemmMicrokernelTester()
23774 .mr(5)
23775 .nr(8)
23776 .kr(1)
23777 .sr(1)
23778 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023779 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023780 .k(k)
23781 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023782 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023783 }
23784 }
23785 }
23786
Frank Barchard0725b8d2020-12-07 11:07:35 -080023787 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023788 for (uint32_t n = 9; n < 16; n++) {
23789 for (size_t k = 1; k <= 5; k += 2) {
23790 for (uint32_t m = 1; m <= 5; m++) {
23791 GemmMicrokernelTester()
23792 .mr(5)
23793 .nr(8)
23794 .kr(1)
23795 .sr(1)
23796 .m(m)
23797 .n(n)
23798 .k(k)
23799 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023800 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023801 }
23802 }
23803 }
23804 }
23805
Frank Barchard0725b8d2020-12-07 11:07:35 -080023806 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023807 for (uint32_t n = 16; n <= 24; n += 8) {
23808 for (size_t k = 1; k <= 5; k += 2) {
23809 GemmMicrokernelTester()
23810 .mr(5)
23811 .nr(8)
23812 .kr(1)
23813 .sr(1)
23814 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023815 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023816 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023817 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023818 }
23819 }
23820 }
23821
Frank Barchard0725b8d2020-12-07 11:07:35 -080023822 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023823 for (uint32_t n = 16; n <= 24; n += 8) {
23824 for (size_t k = 1; k <= 5; k += 2) {
23825 GemmMicrokernelTester()
23826 .mr(5)
23827 .nr(8)
23828 .kr(1)
23829 .sr(1)
23830 .m(5)
23831 .n(n)
23832 .k(k)
23833 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023834 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023835 }
23836 }
23837 }
23838
Frank Barchard0725b8d2020-12-07 11:07:35 -080023839 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023840 for (uint32_t n = 16; n <= 24; n += 8) {
23841 for (size_t k = 1; k <= 5; k += 2) {
23842 for (uint32_t m = 1; m <= 5; m++) {
23843 GemmMicrokernelTester()
23844 .mr(5)
23845 .nr(8)
23846 .kr(1)
23847 .sr(1)
23848 .m(m)
23849 .n(n)
23850 .k(k)
23851 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023852 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023853 }
23854 }
23855 }
23856 }
23857
Frank Barchard0725b8d2020-12-07 11:07:35 -080023858 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023859 for (size_t k = 1; k <= 5; k += 2) {
23860 GemmMicrokernelTester()
23861 .mr(5)
23862 .nr(8)
23863 .kr(1)
23864 .sr(1)
23865 .m(5)
23866 .n(8)
23867 .k(k)
23868 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023869 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023870 }
23871 }
23872
Frank Barchard0725b8d2020-12-07 11:07:35 -080023873 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023874 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023875 for (uint32_t n = 1; n <= 8; n++) {
23876 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023877 GemmMicrokernelTester()
23878 .mr(5)
23879 .nr(8)
23880 .kr(1)
23881 .sr(1)
23882 .m(m)
23883 .n(n)
23884 .k(k)
23885 .ks(3)
23886 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023887 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023888 }
23889 }
23890 }
23891 }
23892
Frank Barchard0725b8d2020-12-07 11:07:35 -080023893 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023894 for (uint32_t n = 9; n < 16; n++) {
23895 for (size_t k = 1; k <= 5; k += 2) {
23896 GemmMicrokernelTester()
23897 .mr(5)
23898 .nr(8)
23899 .kr(1)
23900 .sr(1)
23901 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023902 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023903 .k(k)
23904 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023905 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023906 }
23907 }
23908 }
23909
Frank Barchard0725b8d2020-12-07 11:07:35 -080023910 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023911 for (uint32_t n = 16; n <= 24; n += 8) {
23912 for (size_t k = 1; k <= 5; k += 2) {
23913 GemmMicrokernelTester()
23914 .mr(5)
23915 .nr(8)
23916 .kr(1)
23917 .sr(1)
23918 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023919 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023920 .k(k)
23921 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023922 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023923 }
23924 }
23925 }
23926
Frank Barchard0725b8d2020-12-07 11:07:35 -080023927 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023928 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023929 for (uint32_t n = 1; n <= 8; n++) {
23930 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023931 GemmMicrokernelTester()
23932 .mr(5)
23933 .nr(8)
23934 .kr(1)
23935 .sr(1)
23936 .m(m)
23937 .n(n)
23938 .k(k)
23939 .cm_stride(11)
23940 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023941 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023942 }
23943 }
23944 }
23945 }
23946
Frank Barchard0725b8d2020-12-07 11:07:35 -080023947 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023948 for (size_t k = 1; k <= 5; k += 2) {
23949 GemmMicrokernelTester()
23950 .mr(5)
23951 .nr(8)
23952 .kr(1)
23953 .sr(1)
23954 .m(5)
23955 .n(8)
23956 .k(k)
23957 .ks(3)
23958 .a_offset(29)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023959 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023960 }
23961 }
23962
Frank Barchard0725b8d2020-12-07 11:07:35 -080023963 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023964 for (size_t k = 1; k <= 5; k += 2) {
23965 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023966 GemmMicrokernelTester()
23967 .mr(5)
23968 .nr(8)
23969 .kr(1)
23970 .sr(1)
23971 .m(5)
23972 .n(8)
23973 .k(k)
23974 .ks(3)
23975 .a_offset(29)
23976 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023977 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023978 }
23979 }
23980 }
23981
Frank Barchard0725b8d2020-12-07 11:07:35 -080023982 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023983 GemmMicrokernelTester()
23984 .mr(5)
23985 .nr(8)
23986 .kr(1)
23987 .sr(1)
23988 .m(5)
23989 .n(8)
23990 .k(1)
23991 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080023992 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023993 }
23994
Frank Barchard0725b8d2020-12-07 11:07:35 -080023995 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070023996 GemmMicrokernelTester()
23997 .mr(5)
23998 .nr(8)
23999 .kr(1)
24000 .sr(1)
24001 .m(5)
24002 .n(8)
24003 .k(1)
24004 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024005 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024006 }
24007
Frank Barchard0725b8d2020-12-07 11:07:35 -080024008 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024009 GemmMicrokernelTester()
24010 .mr(5)
24011 .nr(8)
24012 .kr(1)
24013 .sr(1)
24014 .m(5)
24015 .n(8)
24016 .k(1)
24017 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024018 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024019 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024020#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024021
24022
Marat Dukhan4c617792021-12-21 15:47:58 -080024023#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080024024 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024025 GemmMicrokernelTester()
24026 .mr(3)
24027 .nr(8)
24028 .kr(1)
24029 .sr(1)
24030 .m(3)
24031 .n(8)
24032 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024033 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024034 }
24035
Frank Barchard0725b8d2020-12-07 11:07:35 -080024036 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024037 GemmMicrokernelTester()
24038 .mr(3)
24039 .nr(8)
24040 .kr(1)
24041 .sr(1)
24042 .m(3)
24043 .n(8)
24044 .k(4)
24045 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024046 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024047 }
24048
Frank Barchard0725b8d2020-12-07 11:07:35 -080024049 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024050 for (uint32_t n = 1; n <= 8; n++) {
24051 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024052 GemmMicrokernelTester()
24053 .mr(3)
24054 .nr(8)
24055 .kr(1)
24056 .sr(1)
24057 .m(m)
24058 .n(n)
24059 .k(4)
24060 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024061 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024062 }
24063 }
24064 }
24065
Frank Barchard0725b8d2020-12-07 11:07:35 -080024066 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024067 for (uint32_t m = 1; m <= 3; m++) {
24068 GemmMicrokernelTester()
24069 .mr(3)
24070 .nr(8)
24071 .kr(1)
24072 .sr(1)
24073 .m(m)
24074 .n(8)
24075 .k(4)
24076 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024077 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024078 }
24079 }
24080
Frank Barchard0725b8d2020-12-07 11:07:35 -080024081 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024082 for (uint32_t n = 1; n <= 8; n++) {
24083 GemmMicrokernelTester()
24084 .mr(3)
24085 .nr(8)
24086 .kr(1)
24087 .sr(1)
24088 .m(3)
24089 .n(n)
24090 .k(4)
24091 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024092 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024093 }
24094 }
24095
Frank Barchard0725b8d2020-12-07 11:07:35 -080024096 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024097 for (size_t k = 1; k < 4; k++) {
24098 GemmMicrokernelTester()
24099 .mr(3)
24100 .nr(8)
24101 .kr(1)
24102 .sr(1)
24103 .m(3)
24104 .n(8)
24105 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024106 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024107 }
24108 }
24109
Frank Barchard0725b8d2020-12-07 11:07:35 -080024110 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024111 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024112 for (uint32_t n = 1; n <= 8; n++) {
24113 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024114 GemmMicrokernelTester()
24115 .mr(3)
24116 .nr(8)
24117 .kr(1)
24118 .sr(1)
24119 .m(m)
24120 .n(n)
24121 .k(k)
24122 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024123 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024124 }
24125 }
24126 }
24127 }
24128
Frank Barchard0725b8d2020-12-07 11:07:35 -080024129 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024130 for (size_t k = 5; k < 8; k++) {
24131 GemmMicrokernelTester()
24132 .mr(3)
24133 .nr(8)
24134 .kr(1)
24135 .sr(1)
24136 .m(3)
24137 .n(8)
24138 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024139 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024140 }
24141 }
24142
Frank Barchard0725b8d2020-12-07 11:07:35 -080024143 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024144 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024145 for (uint32_t n = 1; n <= 8; n++) {
24146 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024147 GemmMicrokernelTester()
24148 .mr(3)
24149 .nr(8)
24150 .kr(1)
24151 .sr(1)
24152 .m(m)
24153 .n(n)
24154 .k(k)
24155 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024156 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024157 }
24158 }
24159 }
24160 }
24161
Frank Barchard0725b8d2020-12-07 11:07:35 -080024162 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024163 for (size_t k = 8; k <= 40; k += 4) {
24164 GemmMicrokernelTester()
24165 .mr(3)
24166 .nr(8)
24167 .kr(1)
24168 .sr(1)
24169 .m(3)
24170 .n(8)
24171 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024172 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024173 }
24174 }
24175
Frank Barchard0725b8d2020-12-07 11:07:35 -080024176 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024177 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024178 for (uint32_t n = 1; n <= 8; n++) {
24179 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024180 GemmMicrokernelTester()
24181 .mr(3)
24182 .nr(8)
24183 .kr(1)
24184 .sr(1)
24185 .m(m)
24186 .n(n)
24187 .k(k)
24188 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024189 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024190 }
24191 }
24192 }
24193 }
24194
Frank Barchard0725b8d2020-12-07 11:07:35 -080024195 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024196 for (uint32_t n = 9; n < 16; n++) {
24197 for (size_t k = 1; k <= 20; k += 5) {
24198 GemmMicrokernelTester()
24199 .mr(3)
24200 .nr(8)
24201 .kr(1)
24202 .sr(1)
24203 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024204 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024205 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024206 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024207 }
24208 }
24209 }
24210
Frank Barchard0725b8d2020-12-07 11:07:35 -080024211 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024212 for (uint32_t n = 9; n < 16; n++) {
24213 for (size_t k = 1; k <= 20; k += 5) {
24214 GemmMicrokernelTester()
24215 .mr(3)
24216 .nr(8)
24217 .kr(1)
24218 .sr(1)
24219 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024220 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024221 .k(k)
24222 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024223 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024224 }
24225 }
24226 }
24227
Frank Barchard0725b8d2020-12-07 11:07:35 -080024228 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024229 for (uint32_t n = 9; n < 16; n++) {
24230 for (size_t k = 1; k <= 20; k += 5) {
24231 for (uint32_t m = 1; m <= 3; m++) {
24232 GemmMicrokernelTester()
24233 .mr(3)
24234 .nr(8)
24235 .kr(1)
24236 .sr(1)
24237 .m(m)
24238 .n(n)
24239 .k(k)
24240 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024241 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024242 }
24243 }
24244 }
24245 }
24246
Frank Barchard0725b8d2020-12-07 11:07:35 -080024247 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024248 for (uint32_t n = 16; n <= 24; n += 8) {
24249 for (size_t k = 1; k <= 20; k += 5) {
24250 GemmMicrokernelTester()
24251 .mr(3)
24252 .nr(8)
24253 .kr(1)
24254 .sr(1)
24255 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024256 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024257 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024258 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024259 }
24260 }
24261 }
24262
Frank Barchard0725b8d2020-12-07 11:07:35 -080024263 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024264 for (uint32_t n = 16; n <= 24; n += 8) {
24265 for (size_t k = 1; k <= 20; k += 5) {
24266 GemmMicrokernelTester()
24267 .mr(3)
24268 .nr(8)
24269 .kr(1)
24270 .sr(1)
24271 .m(3)
24272 .n(n)
24273 .k(k)
24274 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024275 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024276 }
24277 }
24278 }
24279
Frank Barchard0725b8d2020-12-07 11:07:35 -080024280 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024281 for (uint32_t n = 16; n <= 24; n += 8) {
24282 for (size_t k = 1; k <= 20; k += 5) {
24283 for (uint32_t m = 1; m <= 3; m++) {
24284 GemmMicrokernelTester()
24285 .mr(3)
24286 .nr(8)
24287 .kr(1)
24288 .sr(1)
24289 .m(m)
24290 .n(n)
24291 .k(k)
24292 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024293 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024294 }
24295 }
24296 }
24297 }
24298
Frank Barchard0725b8d2020-12-07 11:07:35 -080024299 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024300 for (size_t k = 1; k <= 20; k += 5) {
24301 GemmMicrokernelTester()
24302 .mr(3)
24303 .nr(8)
24304 .kr(1)
24305 .sr(1)
24306 .m(3)
24307 .n(8)
24308 .k(k)
24309 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024310 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024311 }
24312 }
24313
Frank Barchard0725b8d2020-12-07 11:07:35 -080024314 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024315 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024316 for (uint32_t n = 1; n <= 8; n++) {
24317 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024318 GemmMicrokernelTester()
24319 .mr(3)
24320 .nr(8)
24321 .kr(1)
24322 .sr(1)
24323 .m(m)
24324 .n(n)
24325 .k(k)
24326 .ks(3)
24327 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024328 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024329 }
24330 }
24331 }
24332 }
24333
Frank Barchard0725b8d2020-12-07 11:07:35 -080024334 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024335 for (uint32_t n = 9; n < 16; n++) {
24336 for (size_t k = 1; k <= 20; k += 5) {
24337 GemmMicrokernelTester()
24338 .mr(3)
24339 .nr(8)
24340 .kr(1)
24341 .sr(1)
24342 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024343 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024344 .k(k)
24345 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024346 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024347 }
24348 }
24349 }
24350
Frank Barchard0725b8d2020-12-07 11:07:35 -080024351 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024352 for (uint32_t n = 16; n <= 24; n += 8) {
24353 for (size_t k = 1; k <= 20; k += 5) {
24354 GemmMicrokernelTester()
24355 .mr(3)
24356 .nr(8)
24357 .kr(1)
24358 .sr(1)
24359 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024360 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024361 .k(k)
24362 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024363 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024364 }
24365 }
24366 }
24367
Frank Barchard0725b8d2020-12-07 11:07:35 -080024368 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024369 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024370 for (uint32_t n = 1; n <= 8; n++) {
24371 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024372 GemmMicrokernelTester()
24373 .mr(3)
24374 .nr(8)
24375 .kr(1)
24376 .sr(1)
24377 .m(m)
24378 .n(n)
24379 .k(k)
24380 .cm_stride(11)
24381 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024382 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024383 }
24384 }
24385 }
24386 }
24387
Frank Barchard0725b8d2020-12-07 11:07:35 -080024388 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024389 for (size_t k = 1; k <= 20; k += 5) {
24390 GemmMicrokernelTester()
24391 .mr(3)
24392 .nr(8)
24393 .kr(1)
24394 .sr(1)
24395 .m(3)
24396 .n(8)
24397 .k(k)
24398 .ks(3)
24399 .a_offset(67)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024400 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024401 }
24402 }
24403
Frank Barchard0725b8d2020-12-07 11:07:35 -080024404 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024405 for (size_t k = 1; k <= 20; k += 5) {
24406 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024407 GemmMicrokernelTester()
24408 .mr(3)
24409 .nr(8)
24410 .kr(1)
24411 .sr(1)
24412 .m(3)
24413 .n(8)
24414 .k(k)
24415 .ks(3)
24416 .a_offset(67)
24417 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024418 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024419 }
24420 }
24421 }
24422
Frank Barchard0725b8d2020-12-07 11:07:35 -080024423 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024424 GemmMicrokernelTester()
24425 .mr(3)
24426 .nr(8)
24427 .kr(1)
24428 .sr(1)
24429 .m(3)
24430 .n(8)
24431 .k(4)
24432 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024433 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024434 }
24435
Frank Barchard0725b8d2020-12-07 11:07:35 -080024436 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024437 GemmMicrokernelTester()
24438 .mr(3)
24439 .nr(8)
24440 .kr(1)
24441 .sr(1)
24442 .m(3)
24443 .n(8)
24444 .k(4)
24445 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024446 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024447 }
24448
Frank Barchard0725b8d2020-12-07 11:07:35 -080024449 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024450 GemmMicrokernelTester()
24451 .mr(3)
24452 .nr(8)
24453 .kr(1)
24454 .sr(1)
24455 .m(3)
24456 .n(8)
24457 .k(4)
24458 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024459 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024460 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024461#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024462
24463
Marat Dukhan4c617792021-12-21 15:47:58 -080024464#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080024465 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024466 GemmMicrokernelTester()
24467 .mr(4)
24468 .nr(8)
24469 .kr(1)
24470 .sr(1)
24471 .m(4)
24472 .n(8)
24473 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024474 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024475 }
24476
Frank Barchard0725b8d2020-12-07 11:07:35 -080024477 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024478 GemmMicrokernelTester()
24479 .mr(4)
24480 .nr(8)
24481 .kr(1)
24482 .sr(1)
24483 .m(4)
24484 .n(8)
24485 .k(4)
24486 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024487 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024488 }
24489
Frank Barchard0725b8d2020-12-07 11:07:35 -080024490 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024491 for (uint32_t n = 1; n <= 8; n++) {
24492 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024493 GemmMicrokernelTester()
24494 .mr(4)
24495 .nr(8)
24496 .kr(1)
24497 .sr(1)
24498 .m(m)
24499 .n(n)
24500 .k(4)
24501 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024502 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024503 }
24504 }
24505 }
24506
Frank Barchard0725b8d2020-12-07 11:07:35 -080024507 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024508 for (uint32_t m = 1; m <= 4; m++) {
24509 GemmMicrokernelTester()
24510 .mr(4)
24511 .nr(8)
24512 .kr(1)
24513 .sr(1)
24514 .m(m)
24515 .n(8)
24516 .k(4)
24517 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024518 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024519 }
24520 }
24521
Frank Barchard0725b8d2020-12-07 11:07:35 -080024522 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024523 for (uint32_t n = 1; n <= 8; n++) {
24524 GemmMicrokernelTester()
24525 .mr(4)
24526 .nr(8)
24527 .kr(1)
24528 .sr(1)
24529 .m(4)
24530 .n(n)
24531 .k(4)
24532 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024533 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024534 }
24535 }
24536
Frank Barchard0725b8d2020-12-07 11:07:35 -080024537 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024538 for (size_t k = 1; k < 4; k++) {
24539 GemmMicrokernelTester()
24540 .mr(4)
24541 .nr(8)
24542 .kr(1)
24543 .sr(1)
24544 .m(4)
24545 .n(8)
24546 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024547 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024548 }
24549 }
24550
Frank Barchard0725b8d2020-12-07 11:07:35 -080024551 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024552 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024553 for (uint32_t n = 1; n <= 8; n++) {
24554 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024555 GemmMicrokernelTester()
24556 .mr(4)
24557 .nr(8)
24558 .kr(1)
24559 .sr(1)
24560 .m(m)
24561 .n(n)
24562 .k(k)
24563 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024564 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024565 }
24566 }
24567 }
24568 }
24569
Frank Barchard0725b8d2020-12-07 11:07:35 -080024570 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024571 for (size_t k = 5; k < 8; k++) {
24572 GemmMicrokernelTester()
24573 .mr(4)
24574 .nr(8)
24575 .kr(1)
24576 .sr(1)
24577 .m(4)
24578 .n(8)
24579 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024580 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024581 }
24582 }
24583
Frank Barchard0725b8d2020-12-07 11:07:35 -080024584 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024585 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024586 for (uint32_t n = 1; n <= 8; n++) {
24587 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024588 GemmMicrokernelTester()
24589 .mr(4)
24590 .nr(8)
24591 .kr(1)
24592 .sr(1)
24593 .m(m)
24594 .n(n)
24595 .k(k)
24596 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024597 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024598 }
24599 }
24600 }
24601 }
24602
Frank Barchard0725b8d2020-12-07 11:07:35 -080024603 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024604 for (size_t k = 8; k <= 40; k += 4) {
24605 GemmMicrokernelTester()
24606 .mr(4)
24607 .nr(8)
24608 .kr(1)
24609 .sr(1)
24610 .m(4)
24611 .n(8)
24612 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024613 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024614 }
24615 }
24616
Frank Barchard0725b8d2020-12-07 11:07:35 -080024617 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024618 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024619 for (uint32_t n = 1; n <= 8; n++) {
24620 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024621 GemmMicrokernelTester()
24622 .mr(4)
24623 .nr(8)
24624 .kr(1)
24625 .sr(1)
24626 .m(m)
24627 .n(n)
24628 .k(k)
24629 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024630 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024631 }
24632 }
24633 }
24634 }
24635
Frank Barchard0725b8d2020-12-07 11:07:35 -080024636 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024637 for (uint32_t n = 9; n < 16; n++) {
24638 for (size_t k = 1; k <= 20; k += 5) {
24639 GemmMicrokernelTester()
24640 .mr(4)
24641 .nr(8)
24642 .kr(1)
24643 .sr(1)
24644 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024645 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024646 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024647 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024648 }
24649 }
24650 }
24651
Frank Barchard0725b8d2020-12-07 11:07:35 -080024652 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024653 for (uint32_t n = 9; n < 16; n++) {
24654 for (size_t k = 1; k <= 20; k += 5) {
24655 GemmMicrokernelTester()
24656 .mr(4)
24657 .nr(8)
24658 .kr(1)
24659 .sr(1)
24660 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024661 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024662 .k(k)
24663 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024664 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024665 }
24666 }
24667 }
24668
Frank Barchard0725b8d2020-12-07 11:07:35 -080024669 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024670 for (uint32_t n = 9; n < 16; n++) {
24671 for (size_t k = 1; k <= 20; k += 5) {
24672 for (uint32_t m = 1; m <= 4; m++) {
24673 GemmMicrokernelTester()
24674 .mr(4)
24675 .nr(8)
24676 .kr(1)
24677 .sr(1)
24678 .m(m)
24679 .n(n)
24680 .k(k)
24681 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024682 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024683 }
24684 }
24685 }
24686 }
24687
Frank Barchard0725b8d2020-12-07 11:07:35 -080024688 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024689 for (uint32_t n = 16; n <= 24; n += 8) {
24690 for (size_t k = 1; k <= 20; k += 5) {
24691 GemmMicrokernelTester()
24692 .mr(4)
24693 .nr(8)
24694 .kr(1)
24695 .sr(1)
24696 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024697 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024698 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024699 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024700 }
24701 }
24702 }
24703
Frank Barchard0725b8d2020-12-07 11:07:35 -080024704 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024705 for (uint32_t n = 16; n <= 24; n += 8) {
24706 for (size_t k = 1; k <= 20; k += 5) {
24707 GemmMicrokernelTester()
24708 .mr(4)
24709 .nr(8)
24710 .kr(1)
24711 .sr(1)
24712 .m(4)
24713 .n(n)
24714 .k(k)
24715 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024716 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024717 }
24718 }
24719 }
24720
Frank Barchard0725b8d2020-12-07 11:07:35 -080024721 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024722 for (uint32_t n = 16; n <= 24; n += 8) {
24723 for (size_t k = 1; k <= 20; k += 5) {
24724 for (uint32_t m = 1; m <= 4; m++) {
24725 GemmMicrokernelTester()
24726 .mr(4)
24727 .nr(8)
24728 .kr(1)
24729 .sr(1)
24730 .m(m)
24731 .n(n)
24732 .k(k)
24733 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024734 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024735 }
24736 }
24737 }
24738 }
24739
Frank Barchard0725b8d2020-12-07 11:07:35 -080024740 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024741 for (size_t k = 1; k <= 20; k += 5) {
24742 GemmMicrokernelTester()
24743 .mr(4)
24744 .nr(8)
24745 .kr(1)
24746 .sr(1)
24747 .m(4)
24748 .n(8)
24749 .k(k)
24750 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024751 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024752 }
24753 }
24754
Frank Barchard0725b8d2020-12-07 11:07:35 -080024755 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024756 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024757 for (uint32_t n = 1; n <= 8; n++) {
24758 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024759 GemmMicrokernelTester()
24760 .mr(4)
24761 .nr(8)
24762 .kr(1)
24763 .sr(1)
24764 .m(m)
24765 .n(n)
24766 .k(k)
24767 .ks(3)
24768 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024769 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024770 }
24771 }
24772 }
24773 }
24774
Frank Barchard0725b8d2020-12-07 11:07:35 -080024775 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024776 for (uint32_t n = 9; n < 16; n++) {
24777 for (size_t k = 1; k <= 20; k += 5) {
24778 GemmMicrokernelTester()
24779 .mr(4)
24780 .nr(8)
24781 .kr(1)
24782 .sr(1)
24783 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024784 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024785 .k(k)
24786 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024787 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024788 }
24789 }
24790 }
24791
Frank Barchard0725b8d2020-12-07 11:07:35 -080024792 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024793 for (uint32_t n = 16; n <= 24; n += 8) {
24794 for (size_t k = 1; k <= 20; k += 5) {
24795 GemmMicrokernelTester()
24796 .mr(4)
24797 .nr(8)
24798 .kr(1)
24799 .sr(1)
24800 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024801 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024802 .k(k)
24803 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024804 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024805 }
24806 }
24807 }
24808
Frank Barchard0725b8d2020-12-07 11:07:35 -080024809 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024810 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024811 for (uint32_t n = 1; n <= 8; n++) {
24812 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024813 GemmMicrokernelTester()
24814 .mr(4)
24815 .nr(8)
24816 .kr(1)
24817 .sr(1)
24818 .m(m)
24819 .n(n)
24820 .k(k)
24821 .cm_stride(11)
24822 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024823 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024824 }
24825 }
24826 }
24827 }
24828
Frank Barchard0725b8d2020-12-07 11:07:35 -080024829 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024830 for (size_t k = 1; k <= 20; k += 5) {
24831 GemmMicrokernelTester()
24832 .mr(4)
24833 .nr(8)
24834 .kr(1)
24835 .sr(1)
24836 .m(4)
24837 .n(8)
24838 .k(k)
24839 .ks(3)
24840 .a_offset(83)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024841 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024842 }
24843 }
24844
Frank Barchard0725b8d2020-12-07 11:07:35 -080024845 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024846 for (size_t k = 1; k <= 20; k += 5) {
24847 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024848 GemmMicrokernelTester()
24849 .mr(4)
24850 .nr(8)
24851 .kr(1)
24852 .sr(1)
24853 .m(4)
24854 .n(8)
24855 .k(k)
24856 .ks(3)
24857 .a_offset(83)
24858 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024859 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024860 }
24861 }
24862 }
24863
Frank Barchard0725b8d2020-12-07 11:07:35 -080024864 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024865 GemmMicrokernelTester()
24866 .mr(4)
24867 .nr(8)
24868 .kr(1)
24869 .sr(1)
24870 .m(4)
24871 .n(8)
24872 .k(4)
24873 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024874 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024875 }
24876
Frank Barchard0725b8d2020-12-07 11:07:35 -080024877 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024878 GemmMicrokernelTester()
24879 .mr(4)
24880 .nr(8)
24881 .kr(1)
24882 .sr(1)
24883 .m(4)
24884 .n(8)
24885 .k(4)
24886 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024887 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024888 }
24889
Frank Barchard0725b8d2020-12-07 11:07:35 -080024890 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024891 GemmMicrokernelTester()
24892 .mr(4)
24893 .nr(8)
24894 .kr(1)
24895 .sr(1)
24896 .m(4)
24897 .n(8)
24898 .k(4)
24899 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024900 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024901 }
Marat Dukhan4c617792021-12-21 15:47:58 -080024902#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024903
24904
Marat Dukhan4c617792021-12-21 15:47:58 -080024905#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080024906 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024907 GemmMicrokernelTester()
24908 .mr(5)
24909 .nr(8)
24910 .kr(1)
24911 .sr(1)
24912 .m(5)
24913 .n(8)
24914 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024915 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024916 }
24917
Frank Barchard0725b8d2020-12-07 11:07:35 -080024918 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024919 GemmMicrokernelTester()
24920 .mr(5)
24921 .nr(8)
24922 .kr(1)
24923 .sr(1)
24924 .m(5)
24925 .n(8)
24926 .k(4)
24927 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024928 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024929 }
24930
Frank Barchard0725b8d2020-12-07 11:07:35 -080024931 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024932 for (uint32_t n = 1; n <= 8; n++) {
24933 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024934 GemmMicrokernelTester()
24935 .mr(5)
24936 .nr(8)
24937 .kr(1)
24938 .sr(1)
24939 .m(m)
24940 .n(n)
24941 .k(4)
24942 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024943 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024944 }
24945 }
24946 }
24947
Frank Barchard0725b8d2020-12-07 11:07:35 -080024948 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024949 for (uint32_t m = 1; m <= 5; m++) {
24950 GemmMicrokernelTester()
24951 .mr(5)
24952 .nr(8)
24953 .kr(1)
24954 .sr(1)
24955 .m(m)
24956 .n(8)
24957 .k(4)
24958 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024959 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024960 }
24961 }
24962
Frank Barchard0725b8d2020-12-07 11:07:35 -080024963 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024964 for (uint32_t n = 1; n <= 8; n++) {
24965 GemmMicrokernelTester()
24966 .mr(5)
24967 .nr(8)
24968 .kr(1)
24969 .sr(1)
24970 .m(5)
24971 .n(n)
24972 .k(4)
24973 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024974 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024975 }
24976 }
24977
Frank Barchard0725b8d2020-12-07 11:07:35 -080024978 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024979 for (size_t k = 1; k < 4; k++) {
24980 GemmMicrokernelTester()
24981 .mr(5)
24982 .nr(8)
24983 .kr(1)
24984 .sr(1)
24985 .m(5)
24986 .n(8)
24987 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080024988 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024989 }
24990 }
24991
Frank Barchard0725b8d2020-12-07 11:07:35 -080024992 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024993 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024994 for (uint32_t n = 1; n <= 8; n++) {
24995 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070024996 GemmMicrokernelTester()
24997 .mr(5)
24998 .nr(8)
24999 .kr(1)
25000 .sr(1)
25001 .m(m)
25002 .n(n)
25003 .k(k)
25004 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025005 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025006 }
25007 }
25008 }
25009 }
25010
Frank Barchard0725b8d2020-12-07 11:07:35 -080025011 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025012 for (size_t k = 5; k < 8; k++) {
25013 GemmMicrokernelTester()
25014 .mr(5)
25015 .nr(8)
25016 .kr(1)
25017 .sr(1)
25018 .m(5)
25019 .n(8)
25020 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025021 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025022 }
25023 }
25024
Frank Barchard0725b8d2020-12-07 11:07:35 -080025025 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025026 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025027 for (uint32_t n = 1; n <= 8; n++) {
25028 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025029 GemmMicrokernelTester()
25030 .mr(5)
25031 .nr(8)
25032 .kr(1)
25033 .sr(1)
25034 .m(m)
25035 .n(n)
25036 .k(k)
25037 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025038 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025039 }
25040 }
25041 }
25042 }
25043
Frank Barchard0725b8d2020-12-07 11:07:35 -080025044 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025045 for (size_t k = 8; k <= 40; k += 4) {
25046 GemmMicrokernelTester()
25047 .mr(5)
25048 .nr(8)
25049 .kr(1)
25050 .sr(1)
25051 .m(5)
25052 .n(8)
25053 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025054 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025055 }
25056 }
25057
Frank Barchard0725b8d2020-12-07 11:07:35 -080025058 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025059 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025060 for (uint32_t n = 1; n <= 8; n++) {
25061 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025062 GemmMicrokernelTester()
25063 .mr(5)
25064 .nr(8)
25065 .kr(1)
25066 .sr(1)
25067 .m(m)
25068 .n(n)
25069 .k(k)
25070 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025071 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025072 }
25073 }
25074 }
25075 }
25076
Frank Barchard0725b8d2020-12-07 11:07:35 -080025077 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025078 for (uint32_t n = 9; n < 16; n++) {
25079 for (size_t k = 1; k <= 20; k += 5) {
25080 GemmMicrokernelTester()
25081 .mr(5)
25082 .nr(8)
25083 .kr(1)
25084 .sr(1)
25085 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025086 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025087 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025088 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025089 }
25090 }
25091 }
25092
Frank Barchard0725b8d2020-12-07 11:07:35 -080025093 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025094 for (uint32_t n = 9; n < 16; n++) {
25095 for (size_t k = 1; k <= 20; k += 5) {
25096 GemmMicrokernelTester()
25097 .mr(5)
25098 .nr(8)
25099 .kr(1)
25100 .sr(1)
25101 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025102 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025103 .k(k)
25104 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025105 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025106 }
25107 }
25108 }
25109
Frank Barchard0725b8d2020-12-07 11:07:35 -080025110 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025111 for (uint32_t n = 9; n < 16; n++) {
25112 for (size_t k = 1; k <= 20; k += 5) {
25113 for (uint32_t m = 1; m <= 5; m++) {
25114 GemmMicrokernelTester()
25115 .mr(5)
25116 .nr(8)
25117 .kr(1)
25118 .sr(1)
25119 .m(m)
25120 .n(n)
25121 .k(k)
25122 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025123 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025124 }
25125 }
25126 }
25127 }
25128
Frank Barchard0725b8d2020-12-07 11:07:35 -080025129 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025130 for (uint32_t n = 16; n <= 24; n += 8) {
25131 for (size_t k = 1; k <= 20; k += 5) {
25132 GemmMicrokernelTester()
25133 .mr(5)
25134 .nr(8)
25135 .kr(1)
25136 .sr(1)
25137 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025138 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025139 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025140 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025141 }
25142 }
25143 }
25144
Frank Barchard0725b8d2020-12-07 11:07:35 -080025145 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025146 for (uint32_t n = 16; n <= 24; n += 8) {
25147 for (size_t k = 1; k <= 20; k += 5) {
25148 GemmMicrokernelTester()
25149 .mr(5)
25150 .nr(8)
25151 .kr(1)
25152 .sr(1)
25153 .m(5)
25154 .n(n)
25155 .k(k)
25156 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025157 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025158 }
25159 }
25160 }
25161
Frank Barchard0725b8d2020-12-07 11:07:35 -080025162 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025163 for (uint32_t n = 16; n <= 24; n += 8) {
25164 for (size_t k = 1; k <= 20; k += 5) {
25165 for (uint32_t m = 1; m <= 5; m++) {
25166 GemmMicrokernelTester()
25167 .mr(5)
25168 .nr(8)
25169 .kr(1)
25170 .sr(1)
25171 .m(m)
25172 .n(n)
25173 .k(k)
25174 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025175 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025176 }
25177 }
25178 }
25179 }
25180
Frank Barchard0725b8d2020-12-07 11:07:35 -080025181 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025182 for (size_t k = 1; k <= 20; k += 5) {
25183 GemmMicrokernelTester()
25184 .mr(5)
25185 .nr(8)
25186 .kr(1)
25187 .sr(1)
25188 .m(5)
25189 .n(8)
25190 .k(k)
25191 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025192 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025193 }
25194 }
25195
Frank Barchard0725b8d2020-12-07 11:07:35 -080025196 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025197 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025198 for (uint32_t n = 1; n <= 8; n++) {
25199 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025200 GemmMicrokernelTester()
25201 .mr(5)
25202 .nr(8)
25203 .kr(1)
25204 .sr(1)
25205 .m(m)
25206 .n(n)
25207 .k(k)
25208 .ks(3)
25209 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025210 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025211 }
25212 }
25213 }
25214 }
25215
Frank Barchard0725b8d2020-12-07 11:07:35 -080025216 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025217 for (uint32_t n = 9; n < 16; n++) {
25218 for (size_t k = 1; k <= 20; k += 5) {
25219 GemmMicrokernelTester()
25220 .mr(5)
25221 .nr(8)
25222 .kr(1)
25223 .sr(1)
25224 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025225 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025226 .k(k)
25227 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025228 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025229 }
25230 }
25231 }
25232
Frank Barchard0725b8d2020-12-07 11:07:35 -080025233 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025234 for (uint32_t n = 16; n <= 24; n += 8) {
25235 for (size_t k = 1; k <= 20; k += 5) {
25236 GemmMicrokernelTester()
25237 .mr(5)
25238 .nr(8)
25239 .kr(1)
25240 .sr(1)
25241 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025242 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025243 .k(k)
25244 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025245 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025246 }
25247 }
25248 }
25249
Frank Barchard0725b8d2020-12-07 11:07:35 -080025250 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025251 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025252 for (uint32_t n = 1; n <= 8; n++) {
25253 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025254 GemmMicrokernelTester()
25255 .mr(5)
25256 .nr(8)
25257 .kr(1)
25258 .sr(1)
25259 .m(m)
25260 .n(n)
25261 .k(k)
25262 .cm_stride(11)
25263 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025264 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025265 }
25266 }
25267 }
25268 }
25269
Frank Barchard0725b8d2020-12-07 11:07:35 -080025270 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025271 for (size_t k = 1; k <= 20; k += 5) {
25272 GemmMicrokernelTester()
25273 .mr(5)
25274 .nr(8)
25275 .kr(1)
25276 .sr(1)
25277 .m(5)
25278 .n(8)
25279 .k(k)
25280 .ks(3)
25281 .a_offset(103)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025282 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025283 }
25284 }
25285
Frank Barchard0725b8d2020-12-07 11:07:35 -080025286 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025287 for (size_t k = 1; k <= 20; k += 5) {
25288 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025289 GemmMicrokernelTester()
25290 .mr(5)
25291 .nr(8)
25292 .kr(1)
25293 .sr(1)
25294 .m(5)
25295 .n(8)
25296 .k(k)
25297 .ks(3)
25298 .a_offset(103)
25299 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025300 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025301 }
25302 }
25303 }
25304
Frank Barchard0725b8d2020-12-07 11:07:35 -080025305 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025306 GemmMicrokernelTester()
25307 .mr(5)
25308 .nr(8)
25309 .kr(1)
25310 .sr(1)
25311 .m(5)
25312 .n(8)
25313 .k(4)
25314 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025315 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025316 }
25317
Frank Barchard0725b8d2020-12-07 11:07:35 -080025318 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025319 GemmMicrokernelTester()
25320 .mr(5)
25321 .nr(8)
25322 .kr(1)
25323 .sr(1)
25324 .m(5)
25325 .n(8)
25326 .k(4)
25327 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025328 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025329 }
25330
Frank Barchard0725b8d2020-12-07 11:07:35 -080025331 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025332 GemmMicrokernelTester()
25333 .mr(5)
25334 .nr(8)
25335 .kr(1)
25336 .sr(1)
25337 .m(5)
25338 .n(8)
25339 .k(4)
25340 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025341 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025342 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025343#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025344
25345
Marat Dukhan4c617792021-12-21 15:47:58 -080025346#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080025347 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025348 GemmMicrokernelTester()
25349 .mr(3)
25350 .nr(8)
25351 .kr(1)
25352 .sr(1)
25353 .m(3)
25354 .n(8)
25355 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025356 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025357 }
25358
Frank Barchard0725b8d2020-12-07 11:07:35 -080025359 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025360 GemmMicrokernelTester()
25361 .mr(3)
25362 .nr(8)
25363 .kr(1)
25364 .sr(1)
25365 .m(3)
25366 .n(8)
25367 .k(4)
25368 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025369 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025370 }
25371
Frank Barchard0725b8d2020-12-07 11:07:35 -080025372 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025373 for (uint32_t n = 1; n <= 8; n++) {
25374 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025375 GemmMicrokernelTester()
25376 .mr(3)
25377 .nr(8)
25378 .kr(1)
25379 .sr(1)
25380 .m(m)
25381 .n(n)
25382 .k(4)
25383 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025384 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025385 }
25386 }
25387 }
25388
Frank Barchard0725b8d2020-12-07 11:07:35 -080025389 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025390 for (uint32_t m = 1; m <= 3; m++) {
25391 GemmMicrokernelTester()
25392 .mr(3)
25393 .nr(8)
25394 .kr(1)
25395 .sr(1)
25396 .m(m)
25397 .n(8)
25398 .k(4)
25399 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025400 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025401 }
25402 }
25403
Frank Barchard0725b8d2020-12-07 11:07:35 -080025404 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025405 for (uint32_t n = 1; n <= 8; n++) {
25406 GemmMicrokernelTester()
25407 .mr(3)
25408 .nr(8)
25409 .kr(1)
25410 .sr(1)
25411 .m(3)
25412 .n(n)
25413 .k(4)
25414 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025415 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025416 }
25417 }
25418
Frank Barchard0725b8d2020-12-07 11:07:35 -080025419 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025420 for (size_t k = 1; k < 4; k++) {
25421 GemmMicrokernelTester()
25422 .mr(3)
25423 .nr(8)
25424 .kr(1)
25425 .sr(1)
25426 .m(3)
25427 .n(8)
25428 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025429 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025430 }
25431 }
25432
Frank Barchard0725b8d2020-12-07 11:07:35 -080025433 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025434 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025435 for (uint32_t n = 1; n <= 8; n++) {
25436 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025437 GemmMicrokernelTester()
25438 .mr(3)
25439 .nr(8)
25440 .kr(1)
25441 .sr(1)
25442 .m(m)
25443 .n(n)
25444 .k(k)
25445 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025446 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025447 }
25448 }
25449 }
25450 }
25451
Frank Barchard0725b8d2020-12-07 11:07:35 -080025452 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025453 for (size_t k = 5; k < 8; k++) {
25454 GemmMicrokernelTester()
25455 .mr(3)
25456 .nr(8)
25457 .kr(1)
25458 .sr(1)
25459 .m(3)
25460 .n(8)
25461 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025462 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025463 }
25464 }
25465
Frank Barchard0725b8d2020-12-07 11:07:35 -080025466 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025467 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025468 for (uint32_t n = 1; n <= 8; n++) {
25469 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025470 GemmMicrokernelTester()
25471 .mr(3)
25472 .nr(8)
25473 .kr(1)
25474 .sr(1)
25475 .m(m)
25476 .n(n)
25477 .k(k)
25478 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025479 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025480 }
25481 }
25482 }
25483 }
25484
Frank Barchard0725b8d2020-12-07 11:07:35 -080025485 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025486 for (size_t k = 8; k <= 40; k += 4) {
25487 GemmMicrokernelTester()
25488 .mr(3)
25489 .nr(8)
25490 .kr(1)
25491 .sr(1)
25492 .m(3)
25493 .n(8)
25494 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025495 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025496 }
25497 }
25498
Frank Barchard0725b8d2020-12-07 11:07:35 -080025499 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025500 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025501 for (uint32_t n = 1; n <= 8; n++) {
25502 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025503 GemmMicrokernelTester()
25504 .mr(3)
25505 .nr(8)
25506 .kr(1)
25507 .sr(1)
25508 .m(m)
25509 .n(n)
25510 .k(k)
25511 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025512 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025513 }
25514 }
25515 }
25516 }
25517
Frank Barchard0725b8d2020-12-07 11:07:35 -080025518 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025519 for (uint32_t n = 9; n < 16; n++) {
25520 for (size_t k = 1; k <= 20; k += 5) {
25521 GemmMicrokernelTester()
25522 .mr(3)
25523 .nr(8)
25524 .kr(1)
25525 .sr(1)
25526 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025527 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025528 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025529 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025530 }
25531 }
25532 }
25533
Frank Barchard0725b8d2020-12-07 11:07:35 -080025534 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025535 for (uint32_t n = 9; n < 16; n++) {
25536 for (size_t k = 1; k <= 20; k += 5) {
25537 GemmMicrokernelTester()
25538 .mr(3)
25539 .nr(8)
25540 .kr(1)
25541 .sr(1)
25542 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025543 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025544 .k(k)
25545 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025546 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025547 }
25548 }
25549 }
25550
Frank Barchard0725b8d2020-12-07 11:07:35 -080025551 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025552 for (uint32_t n = 9; n < 16; n++) {
25553 for (size_t k = 1; k <= 20; k += 5) {
25554 for (uint32_t m = 1; m <= 3; m++) {
25555 GemmMicrokernelTester()
25556 .mr(3)
25557 .nr(8)
25558 .kr(1)
25559 .sr(1)
25560 .m(m)
25561 .n(n)
25562 .k(k)
25563 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025564 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025565 }
25566 }
25567 }
25568 }
25569
Frank Barchard0725b8d2020-12-07 11:07:35 -080025570 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025571 for (uint32_t n = 16; n <= 24; n += 8) {
25572 for (size_t k = 1; k <= 20; k += 5) {
25573 GemmMicrokernelTester()
25574 .mr(3)
25575 .nr(8)
25576 .kr(1)
25577 .sr(1)
25578 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025579 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025580 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025581 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025582 }
25583 }
25584 }
25585
Frank Barchard0725b8d2020-12-07 11:07:35 -080025586 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025587 for (uint32_t n = 16; n <= 24; n += 8) {
25588 for (size_t k = 1; k <= 20; k += 5) {
25589 GemmMicrokernelTester()
25590 .mr(3)
25591 .nr(8)
25592 .kr(1)
25593 .sr(1)
25594 .m(3)
25595 .n(n)
25596 .k(k)
25597 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025598 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025599 }
25600 }
25601 }
25602
Frank Barchard0725b8d2020-12-07 11:07:35 -080025603 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025604 for (uint32_t n = 16; n <= 24; n += 8) {
25605 for (size_t k = 1; k <= 20; k += 5) {
25606 for (uint32_t m = 1; m <= 3; m++) {
25607 GemmMicrokernelTester()
25608 .mr(3)
25609 .nr(8)
25610 .kr(1)
25611 .sr(1)
25612 .m(m)
25613 .n(n)
25614 .k(k)
25615 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025616 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025617 }
25618 }
25619 }
25620 }
25621
Frank Barchard0725b8d2020-12-07 11:07:35 -080025622 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025623 for (size_t k = 1; k <= 20; k += 5) {
25624 GemmMicrokernelTester()
25625 .mr(3)
25626 .nr(8)
25627 .kr(1)
25628 .sr(1)
25629 .m(3)
25630 .n(8)
25631 .k(k)
25632 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025633 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025634 }
25635 }
25636
Frank Barchard0725b8d2020-12-07 11:07:35 -080025637 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025638 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025639 for (uint32_t n = 1; n <= 8; n++) {
25640 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025641 GemmMicrokernelTester()
25642 .mr(3)
25643 .nr(8)
25644 .kr(1)
25645 .sr(1)
25646 .m(m)
25647 .n(n)
25648 .k(k)
25649 .ks(3)
25650 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025651 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025652 }
25653 }
25654 }
25655 }
25656
Frank Barchard0725b8d2020-12-07 11:07:35 -080025657 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025658 for (uint32_t n = 9; n < 16; n++) {
25659 for (size_t k = 1; k <= 20; k += 5) {
25660 GemmMicrokernelTester()
25661 .mr(3)
25662 .nr(8)
25663 .kr(1)
25664 .sr(1)
25665 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025666 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025667 .k(k)
25668 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025669 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025670 }
25671 }
25672 }
25673
Frank Barchard0725b8d2020-12-07 11:07:35 -080025674 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025675 for (uint32_t n = 16; n <= 24; n += 8) {
25676 for (size_t k = 1; k <= 20; k += 5) {
25677 GemmMicrokernelTester()
25678 .mr(3)
25679 .nr(8)
25680 .kr(1)
25681 .sr(1)
25682 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025683 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025684 .k(k)
25685 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025686 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025687 }
25688 }
25689 }
25690
Frank Barchard0725b8d2020-12-07 11:07:35 -080025691 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025692 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025693 for (uint32_t n = 1; n <= 8; n++) {
25694 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025695 GemmMicrokernelTester()
25696 .mr(3)
25697 .nr(8)
25698 .kr(1)
25699 .sr(1)
25700 .m(m)
25701 .n(n)
25702 .k(k)
25703 .cm_stride(11)
25704 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025705 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025706 }
25707 }
25708 }
25709 }
25710
Frank Barchard0725b8d2020-12-07 11:07:35 -080025711 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025712 for (size_t k = 1; k <= 20; k += 5) {
25713 GemmMicrokernelTester()
25714 .mr(3)
25715 .nr(8)
25716 .kr(1)
25717 .sr(1)
25718 .m(3)
25719 .n(8)
25720 .k(k)
25721 .ks(3)
25722 .a_offset(67)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025723 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025724 }
25725 }
25726
Frank Barchard0725b8d2020-12-07 11:07:35 -080025727 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025728 for (size_t k = 1; k <= 20; k += 5) {
25729 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025730 GemmMicrokernelTester()
25731 .mr(3)
25732 .nr(8)
25733 .kr(1)
25734 .sr(1)
25735 .m(3)
25736 .n(8)
25737 .k(k)
25738 .ks(3)
25739 .a_offset(67)
25740 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025741 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025742 }
25743 }
25744 }
25745
Frank Barchard0725b8d2020-12-07 11:07:35 -080025746 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025747 GemmMicrokernelTester()
25748 .mr(3)
25749 .nr(8)
25750 .kr(1)
25751 .sr(1)
25752 .m(3)
25753 .n(8)
25754 .k(4)
25755 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025756 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025757 }
25758
Frank Barchard0725b8d2020-12-07 11:07:35 -080025759 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025760 GemmMicrokernelTester()
25761 .mr(3)
25762 .nr(8)
25763 .kr(1)
25764 .sr(1)
25765 .m(3)
25766 .n(8)
25767 .k(4)
25768 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025769 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025770 }
25771
Frank Barchard0725b8d2020-12-07 11:07:35 -080025772 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025773 GemmMicrokernelTester()
25774 .mr(3)
25775 .nr(8)
25776 .kr(1)
25777 .sr(1)
25778 .m(3)
25779 .n(8)
25780 .k(4)
25781 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025782 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025783 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025784#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025785
25786
Marat Dukhan4c617792021-12-21 15:47:58 -080025787#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080025788 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025789 GemmMicrokernelTester()
25790 .mr(4)
25791 .nr(8)
25792 .kr(1)
25793 .sr(1)
25794 .m(4)
25795 .n(8)
25796 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025797 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025798 }
25799
Frank Barchard0725b8d2020-12-07 11:07:35 -080025800 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025801 GemmMicrokernelTester()
25802 .mr(4)
25803 .nr(8)
25804 .kr(1)
25805 .sr(1)
25806 .m(4)
25807 .n(8)
25808 .k(4)
25809 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025810 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025811 }
25812
Frank Barchard0725b8d2020-12-07 11:07:35 -080025813 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025814 for (uint32_t n = 1; n <= 8; n++) {
25815 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025816 GemmMicrokernelTester()
25817 .mr(4)
25818 .nr(8)
25819 .kr(1)
25820 .sr(1)
25821 .m(m)
25822 .n(n)
25823 .k(4)
25824 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025825 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025826 }
25827 }
25828 }
25829
Frank Barchard0725b8d2020-12-07 11:07:35 -080025830 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025831 for (uint32_t m = 1; m <= 4; m++) {
25832 GemmMicrokernelTester()
25833 .mr(4)
25834 .nr(8)
25835 .kr(1)
25836 .sr(1)
25837 .m(m)
25838 .n(8)
25839 .k(4)
25840 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025841 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025842 }
25843 }
25844
Frank Barchard0725b8d2020-12-07 11:07:35 -080025845 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025846 for (uint32_t n = 1; n <= 8; n++) {
25847 GemmMicrokernelTester()
25848 .mr(4)
25849 .nr(8)
25850 .kr(1)
25851 .sr(1)
25852 .m(4)
25853 .n(n)
25854 .k(4)
25855 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025856 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025857 }
25858 }
25859
Frank Barchard0725b8d2020-12-07 11:07:35 -080025860 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025861 for (size_t k = 1; k < 4; k++) {
25862 GemmMicrokernelTester()
25863 .mr(4)
25864 .nr(8)
25865 .kr(1)
25866 .sr(1)
25867 .m(4)
25868 .n(8)
25869 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025870 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025871 }
25872 }
25873
Frank Barchard0725b8d2020-12-07 11:07:35 -080025874 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025875 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025876 for (uint32_t n = 1; n <= 8; n++) {
25877 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025878 GemmMicrokernelTester()
25879 .mr(4)
25880 .nr(8)
25881 .kr(1)
25882 .sr(1)
25883 .m(m)
25884 .n(n)
25885 .k(k)
25886 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025887 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025888 }
25889 }
25890 }
25891 }
25892
Frank Barchard0725b8d2020-12-07 11:07:35 -080025893 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025894 for (size_t k = 5; k < 8; k++) {
25895 GemmMicrokernelTester()
25896 .mr(4)
25897 .nr(8)
25898 .kr(1)
25899 .sr(1)
25900 .m(4)
25901 .n(8)
25902 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025903 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025904 }
25905 }
25906
Frank Barchard0725b8d2020-12-07 11:07:35 -080025907 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025908 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025909 for (uint32_t n = 1; n <= 8; n++) {
25910 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025911 GemmMicrokernelTester()
25912 .mr(4)
25913 .nr(8)
25914 .kr(1)
25915 .sr(1)
25916 .m(m)
25917 .n(n)
25918 .k(k)
25919 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025920 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025921 }
25922 }
25923 }
25924 }
25925
Frank Barchard0725b8d2020-12-07 11:07:35 -080025926 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025927 for (size_t k = 8; k <= 40; k += 4) {
25928 GemmMicrokernelTester()
25929 .mr(4)
25930 .nr(8)
25931 .kr(1)
25932 .sr(1)
25933 .m(4)
25934 .n(8)
25935 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025936 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025937 }
25938 }
25939
Frank Barchard0725b8d2020-12-07 11:07:35 -080025940 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025941 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025942 for (uint32_t n = 1; n <= 8; n++) {
25943 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025944 GemmMicrokernelTester()
25945 .mr(4)
25946 .nr(8)
25947 .kr(1)
25948 .sr(1)
25949 .m(m)
25950 .n(n)
25951 .k(k)
25952 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025953 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025954 }
25955 }
25956 }
25957 }
25958
Frank Barchard0725b8d2020-12-07 11:07:35 -080025959 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025960 for (uint32_t n = 9; n < 16; n++) {
25961 for (size_t k = 1; k <= 20; k += 5) {
25962 GemmMicrokernelTester()
25963 .mr(4)
25964 .nr(8)
25965 .kr(1)
25966 .sr(1)
25967 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025968 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025969 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025970 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025971 }
25972 }
25973 }
25974
Frank Barchard0725b8d2020-12-07 11:07:35 -080025975 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025976 for (uint32_t n = 9; n < 16; n++) {
25977 for (size_t k = 1; k <= 20; k += 5) {
25978 GemmMicrokernelTester()
25979 .mr(4)
25980 .nr(8)
25981 .kr(1)
25982 .sr(1)
25983 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025984 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025985 .k(k)
25986 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080025987 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025988 }
25989 }
25990 }
25991
Frank Barchard0725b8d2020-12-07 11:07:35 -080025992 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070025993 for (uint32_t n = 9; n < 16; n++) {
25994 for (size_t k = 1; k <= 20; k += 5) {
25995 for (uint32_t m = 1; m <= 4; m++) {
25996 GemmMicrokernelTester()
25997 .mr(4)
25998 .nr(8)
25999 .kr(1)
26000 .sr(1)
26001 .m(m)
26002 .n(n)
26003 .k(k)
26004 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026005 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026006 }
26007 }
26008 }
26009 }
26010
Frank Barchard0725b8d2020-12-07 11:07:35 -080026011 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026012 for (uint32_t n = 16; n <= 24; n += 8) {
26013 for (size_t k = 1; k <= 20; k += 5) {
26014 GemmMicrokernelTester()
26015 .mr(4)
26016 .nr(8)
26017 .kr(1)
26018 .sr(1)
26019 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026020 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026021 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026022 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026023 }
26024 }
26025 }
26026
Frank Barchard0725b8d2020-12-07 11:07:35 -080026027 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026028 for (uint32_t n = 16; n <= 24; n += 8) {
26029 for (size_t k = 1; k <= 20; k += 5) {
26030 GemmMicrokernelTester()
26031 .mr(4)
26032 .nr(8)
26033 .kr(1)
26034 .sr(1)
26035 .m(4)
26036 .n(n)
26037 .k(k)
26038 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026039 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026040 }
26041 }
26042 }
26043
Frank Barchard0725b8d2020-12-07 11:07:35 -080026044 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026045 for (uint32_t n = 16; n <= 24; n += 8) {
26046 for (size_t k = 1; k <= 20; k += 5) {
26047 for (uint32_t m = 1; m <= 4; m++) {
26048 GemmMicrokernelTester()
26049 .mr(4)
26050 .nr(8)
26051 .kr(1)
26052 .sr(1)
26053 .m(m)
26054 .n(n)
26055 .k(k)
26056 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026057 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026058 }
26059 }
26060 }
26061 }
26062
Frank Barchard0725b8d2020-12-07 11:07:35 -080026063 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026064 for (size_t k = 1; k <= 20; k += 5) {
26065 GemmMicrokernelTester()
26066 .mr(4)
26067 .nr(8)
26068 .kr(1)
26069 .sr(1)
26070 .m(4)
26071 .n(8)
26072 .k(k)
26073 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026074 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026075 }
26076 }
26077
Frank Barchard0725b8d2020-12-07 11:07:35 -080026078 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026079 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026080 for (uint32_t n = 1; n <= 8; n++) {
26081 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026082 GemmMicrokernelTester()
26083 .mr(4)
26084 .nr(8)
26085 .kr(1)
26086 .sr(1)
26087 .m(m)
26088 .n(n)
26089 .k(k)
26090 .ks(3)
26091 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026092 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026093 }
26094 }
26095 }
26096 }
26097
Frank Barchard0725b8d2020-12-07 11:07:35 -080026098 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026099 for (uint32_t n = 9; n < 16; n++) {
26100 for (size_t k = 1; k <= 20; k += 5) {
26101 GemmMicrokernelTester()
26102 .mr(4)
26103 .nr(8)
26104 .kr(1)
26105 .sr(1)
26106 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026107 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026108 .k(k)
26109 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026110 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026111 }
26112 }
26113 }
26114
Frank Barchard0725b8d2020-12-07 11:07:35 -080026115 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026116 for (uint32_t n = 16; n <= 24; n += 8) {
26117 for (size_t k = 1; k <= 20; k += 5) {
26118 GemmMicrokernelTester()
26119 .mr(4)
26120 .nr(8)
26121 .kr(1)
26122 .sr(1)
26123 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026124 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026125 .k(k)
26126 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026127 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026128 }
26129 }
26130 }
26131
Frank Barchard0725b8d2020-12-07 11:07:35 -080026132 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026133 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026134 for (uint32_t n = 1; n <= 8; n++) {
26135 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026136 GemmMicrokernelTester()
26137 .mr(4)
26138 .nr(8)
26139 .kr(1)
26140 .sr(1)
26141 .m(m)
26142 .n(n)
26143 .k(k)
26144 .cm_stride(11)
26145 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026146 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026147 }
26148 }
26149 }
26150 }
26151
Frank Barchard0725b8d2020-12-07 11:07:35 -080026152 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026153 for (size_t k = 1; k <= 20; k += 5) {
26154 GemmMicrokernelTester()
26155 .mr(4)
26156 .nr(8)
26157 .kr(1)
26158 .sr(1)
26159 .m(4)
26160 .n(8)
26161 .k(k)
26162 .ks(3)
26163 .a_offset(83)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026164 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026165 }
26166 }
26167
Frank Barchard0725b8d2020-12-07 11:07:35 -080026168 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026169 for (size_t k = 1; k <= 20; k += 5) {
26170 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026171 GemmMicrokernelTester()
26172 .mr(4)
26173 .nr(8)
26174 .kr(1)
26175 .sr(1)
26176 .m(4)
26177 .n(8)
26178 .k(k)
26179 .ks(3)
26180 .a_offset(83)
26181 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026182 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026183 }
26184 }
26185 }
26186
Frank Barchard0725b8d2020-12-07 11:07:35 -080026187 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026188 GemmMicrokernelTester()
26189 .mr(4)
26190 .nr(8)
26191 .kr(1)
26192 .sr(1)
26193 .m(4)
26194 .n(8)
26195 .k(4)
26196 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026197 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026198 }
26199
Frank Barchard0725b8d2020-12-07 11:07:35 -080026200 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026201 GemmMicrokernelTester()
26202 .mr(4)
26203 .nr(8)
26204 .kr(1)
26205 .sr(1)
26206 .m(4)
26207 .n(8)
26208 .k(4)
26209 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026210 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026211 }
26212
Frank Barchard0725b8d2020-12-07 11:07:35 -080026213 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026214 GemmMicrokernelTester()
26215 .mr(4)
26216 .nr(8)
26217 .kr(1)
26218 .sr(1)
26219 .m(4)
26220 .n(8)
26221 .k(4)
26222 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026223 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026224 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026225#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026226
26227
Marat Dukhan4c617792021-12-21 15:47:58 -080026228#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -080026229 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026230 GemmMicrokernelTester()
26231 .mr(5)
26232 .nr(8)
26233 .kr(1)
26234 .sr(1)
26235 .m(5)
26236 .n(8)
26237 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026238 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026239 }
26240
Frank Barchard0725b8d2020-12-07 11:07:35 -080026241 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026242 GemmMicrokernelTester()
26243 .mr(5)
26244 .nr(8)
26245 .kr(1)
26246 .sr(1)
26247 .m(5)
26248 .n(8)
26249 .k(4)
26250 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026251 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026252 }
26253
Frank Barchard0725b8d2020-12-07 11:07:35 -080026254 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026255 for (uint32_t n = 1; n <= 8; n++) {
26256 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026257 GemmMicrokernelTester()
26258 .mr(5)
26259 .nr(8)
26260 .kr(1)
26261 .sr(1)
26262 .m(m)
26263 .n(n)
26264 .k(4)
26265 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026266 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026267 }
26268 }
26269 }
26270
Frank Barchard0725b8d2020-12-07 11:07:35 -080026271 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026272 for (uint32_t m = 1; m <= 5; m++) {
26273 GemmMicrokernelTester()
26274 .mr(5)
26275 .nr(8)
26276 .kr(1)
26277 .sr(1)
26278 .m(m)
26279 .n(8)
26280 .k(4)
26281 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026282 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026283 }
26284 }
26285
Frank Barchard0725b8d2020-12-07 11:07:35 -080026286 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026287 for (uint32_t n = 1; n <= 8; n++) {
26288 GemmMicrokernelTester()
26289 .mr(5)
26290 .nr(8)
26291 .kr(1)
26292 .sr(1)
26293 .m(5)
26294 .n(n)
26295 .k(4)
26296 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026297 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026298 }
26299 }
26300
Frank Barchard0725b8d2020-12-07 11:07:35 -080026301 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026302 for (size_t k = 1; k < 4; k++) {
26303 GemmMicrokernelTester()
26304 .mr(5)
26305 .nr(8)
26306 .kr(1)
26307 .sr(1)
26308 .m(5)
26309 .n(8)
26310 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026311 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026312 }
26313 }
26314
Frank Barchard0725b8d2020-12-07 11:07:35 -080026315 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026316 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026317 for (uint32_t n = 1; n <= 8; n++) {
26318 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026319 GemmMicrokernelTester()
26320 .mr(5)
26321 .nr(8)
26322 .kr(1)
26323 .sr(1)
26324 .m(m)
26325 .n(n)
26326 .k(k)
26327 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026328 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026329 }
26330 }
26331 }
26332 }
26333
Frank Barchard0725b8d2020-12-07 11:07:35 -080026334 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026335 for (size_t k = 5; k < 8; k++) {
26336 GemmMicrokernelTester()
26337 .mr(5)
26338 .nr(8)
26339 .kr(1)
26340 .sr(1)
26341 .m(5)
26342 .n(8)
26343 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026344 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026345 }
26346 }
26347
Frank Barchard0725b8d2020-12-07 11:07:35 -080026348 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026349 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026350 for (uint32_t n = 1; n <= 8; n++) {
26351 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026352 GemmMicrokernelTester()
26353 .mr(5)
26354 .nr(8)
26355 .kr(1)
26356 .sr(1)
26357 .m(m)
26358 .n(n)
26359 .k(k)
26360 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026361 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026362 }
26363 }
26364 }
26365 }
26366
Frank Barchard0725b8d2020-12-07 11:07:35 -080026367 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026368 for (size_t k = 8; k <= 40; k += 4) {
26369 GemmMicrokernelTester()
26370 .mr(5)
26371 .nr(8)
26372 .kr(1)
26373 .sr(1)
26374 .m(5)
26375 .n(8)
26376 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026377 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026378 }
26379 }
26380
Frank Barchard0725b8d2020-12-07 11:07:35 -080026381 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026382 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026383 for (uint32_t n = 1; n <= 8; n++) {
26384 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026385 GemmMicrokernelTester()
26386 .mr(5)
26387 .nr(8)
26388 .kr(1)
26389 .sr(1)
26390 .m(m)
26391 .n(n)
26392 .k(k)
26393 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026394 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026395 }
26396 }
26397 }
26398 }
26399
Frank Barchard0725b8d2020-12-07 11:07:35 -080026400 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026401 for (uint32_t n = 9; n < 16; n++) {
26402 for (size_t k = 1; k <= 20; k += 5) {
26403 GemmMicrokernelTester()
26404 .mr(5)
26405 .nr(8)
26406 .kr(1)
26407 .sr(1)
26408 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026409 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026410 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026411 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026412 }
26413 }
26414 }
26415
Frank Barchard0725b8d2020-12-07 11:07:35 -080026416 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026417 for (uint32_t n = 9; n < 16; n++) {
26418 for (size_t k = 1; k <= 20; k += 5) {
26419 GemmMicrokernelTester()
26420 .mr(5)
26421 .nr(8)
26422 .kr(1)
26423 .sr(1)
26424 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026425 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026426 .k(k)
26427 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026428 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026429 }
26430 }
26431 }
26432
Frank Barchard0725b8d2020-12-07 11:07:35 -080026433 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026434 for (uint32_t n = 9; n < 16; n++) {
26435 for (size_t k = 1; k <= 20; k += 5) {
26436 for (uint32_t m = 1; m <= 5; m++) {
26437 GemmMicrokernelTester()
26438 .mr(5)
26439 .nr(8)
26440 .kr(1)
26441 .sr(1)
26442 .m(m)
26443 .n(n)
26444 .k(k)
26445 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026446 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026447 }
26448 }
26449 }
26450 }
26451
Frank Barchard0725b8d2020-12-07 11:07:35 -080026452 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026453 for (uint32_t n = 16; n <= 24; n += 8) {
26454 for (size_t k = 1; k <= 20; k += 5) {
26455 GemmMicrokernelTester()
26456 .mr(5)
26457 .nr(8)
26458 .kr(1)
26459 .sr(1)
26460 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026461 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026462 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026463 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026464 }
26465 }
26466 }
26467
Frank Barchard0725b8d2020-12-07 11:07:35 -080026468 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026469 for (uint32_t n = 16; n <= 24; n += 8) {
26470 for (size_t k = 1; k <= 20; k += 5) {
26471 GemmMicrokernelTester()
26472 .mr(5)
26473 .nr(8)
26474 .kr(1)
26475 .sr(1)
26476 .m(5)
26477 .n(n)
26478 .k(k)
26479 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026480 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026481 }
26482 }
26483 }
26484
Frank Barchard0725b8d2020-12-07 11:07:35 -080026485 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026486 for (uint32_t n = 16; n <= 24; n += 8) {
26487 for (size_t k = 1; k <= 20; k += 5) {
26488 for (uint32_t m = 1; m <= 5; m++) {
26489 GemmMicrokernelTester()
26490 .mr(5)
26491 .nr(8)
26492 .kr(1)
26493 .sr(1)
26494 .m(m)
26495 .n(n)
26496 .k(k)
26497 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026498 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026499 }
26500 }
26501 }
26502 }
26503
Frank Barchard0725b8d2020-12-07 11:07:35 -080026504 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026505 for (size_t k = 1; k <= 20; k += 5) {
26506 GemmMicrokernelTester()
26507 .mr(5)
26508 .nr(8)
26509 .kr(1)
26510 .sr(1)
26511 .m(5)
26512 .n(8)
26513 .k(k)
26514 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026515 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026516 }
26517 }
26518
Frank Barchard0725b8d2020-12-07 11:07:35 -080026519 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026520 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026521 for (uint32_t n = 1; n <= 8; n++) {
26522 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026523 GemmMicrokernelTester()
26524 .mr(5)
26525 .nr(8)
26526 .kr(1)
26527 .sr(1)
26528 .m(m)
26529 .n(n)
26530 .k(k)
26531 .ks(3)
26532 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026533 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026534 }
26535 }
26536 }
26537 }
26538
Frank Barchard0725b8d2020-12-07 11:07:35 -080026539 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026540 for (uint32_t n = 9; n < 16; n++) {
26541 for (size_t k = 1; k <= 20; k += 5) {
26542 GemmMicrokernelTester()
26543 .mr(5)
26544 .nr(8)
26545 .kr(1)
26546 .sr(1)
26547 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026548 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026549 .k(k)
26550 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026551 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026552 }
26553 }
26554 }
26555
Frank Barchard0725b8d2020-12-07 11:07:35 -080026556 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026557 for (uint32_t n = 16; n <= 24; n += 8) {
26558 for (size_t k = 1; k <= 20; k += 5) {
26559 GemmMicrokernelTester()
26560 .mr(5)
26561 .nr(8)
26562 .kr(1)
26563 .sr(1)
26564 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026565 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026566 .k(k)
26567 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026568 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026569 }
26570 }
26571 }
26572
Frank Barchard0725b8d2020-12-07 11:07:35 -080026573 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026574 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026575 for (uint32_t n = 1; n <= 8; n++) {
26576 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026577 GemmMicrokernelTester()
26578 .mr(5)
26579 .nr(8)
26580 .kr(1)
26581 .sr(1)
26582 .m(m)
26583 .n(n)
26584 .k(k)
26585 .cm_stride(11)
26586 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026587 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026588 }
26589 }
26590 }
26591 }
26592
Frank Barchard0725b8d2020-12-07 11:07:35 -080026593 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, a_offset) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026594 for (size_t k = 1; k <= 20; k += 5) {
26595 GemmMicrokernelTester()
26596 .mr(5)
26597 .nr(8)
26598 .kr(1)
26599 .sr(1)
26600 .m(5)
26601 .n(8)
26602 .k(k)
26603 .ks(3)
26604 .a_offset(103)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026605 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026606 }
26607 }
26608
Frank Barchard0725b8d2020-12-07 11:07:35 -080026609 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026610 for (size_t k = 1; k <= 20; k += 5) {
26611 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026612 GemmMicrokernelTester()
26613 .mr(5)
26614 .nr(8)
26615 .kr(1)
26616 .sr(1)
26617 .m(5)
26618 .n(8)
26619 .k(k)
26620 .ks(3)
26621 .a_offset(103)
26622 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026623 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026624 }
26625 }
26626 }
26627
Frank Barchard0725b8d2020-12-07 11:07:35 -080026628 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmin) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026629 GemmMicrokernelTester()
26630 .mr(5)
26631 .nr(8)
26632 .kr(1)
26633 .sr(1)
26634 .m(5)
26635 .n(8)
26636 .k(4)
26637 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026638 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026639 }
26640
Frank Barchard0725b8d2020-12-07 11:07:35 -080026641 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, qmax) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026642 GemmMicrokernelTester()
26643 .mr(5)
26644 .nr(8)
26645 .kr(1)
26646 .sr(1)
26647 .m(5)
26648 .n(8)
26649 .k(4)
26650 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026651 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026652 }
26653
Frank Barchard0725b8d2020-12-07 11:07:35 -080026654 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_X86_SPLAT, strided_cm) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026655 GemmMicrokernelTester()
26656 .mr(5)
26657 .nr(8)
26658 .kr(1)
26659 .sr(1)
26660 .m(5)
26661 .n(8)
26662 .k(4)
26663 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026664 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026665 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026666#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026667
26668
Marat Dukhan4c617792021-12-21 15:47:58 -080026669#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026670 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4) {
26671 GemmMicrokernelTester()
26672 .mr(1)
26673 .nr(8)
26674 .kr(1)
26675 .sr(4)
26676 .m(1)
26677 .n(8)
26678 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026679 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026680 }
26681
26682 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, strided_cn) {
26683 GemmMicrokernelTester()
26684 .mr(1)
26685 .nr(8)
26686 .kr(1)
26687 .sr(4)
26688 .m(1)
26689 .n(8)
26690 .k(4)
26691 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026692 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026693 }
26694
26695 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026696 for (uint32_t n = 1; n <= 8; n++) {
26697 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026698 GemmMicrokernelTester()
26699 .mr(1)
26700 .nr(8)
26701 .kr(1)
26702 .sr(4)
26703 .m(m)
26704 .n(n)
26705 .k(4)
26706 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026707 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026708 }
26709 }
26710 }
26711
26712 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
26713 for (uint32_t m = 1; m <= 1; m++) {
26714 GemmMicrokernelTester()
26715 .mr(1)
26716 .nr(8)
26717 .kr(1)
26718 .sr(4)
26719 .m(m)
26720 .n(8)
26721 .k(4)
26722 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026723 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026724 }
26725 }
26726
26727 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
26728 for (uint32_t n = 1; n <= 8; n++) {
26729 GemmMicrokernelTester()
26730 .mr(1)
26731 .nr(8)
26732 .kr(1)
26733 .sr(4)
26734 .m(1)
26735 .n(n)
26736 .k(4)
26737 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026738 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026739 }
26740 }
26741
26742 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4) {
26743 for (size_t k = 1; k < 4; k++) {
26744 GemmMicrokernelTester()
26745 .mr(1)
26746 .nr(8)
26747 .kr(1)
26748 .sr(4)
26749 .m(1)
26750 .n(8)
26751 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026752 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026753 }
26754 }
26755
26756 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
26757 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026758 for (uint32_t n = 1; n <= 8; n++) {
26759 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026760 GemmMicrokernelTester()
26761 .mr(1)
26762 .nr(8)
26763 .kr(1)
26764 .sr(4)
26765 .m(m)
26766 .n(n)
26767 .k(k)
26768 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026769 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026770 }
26771 }
26772 }
26773 }
26774
26775 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4) {
26776 for (size_t k = 5; k < 8; k++) {
26777 GemmMicrokernelTester()
26778 .mr(1)
26779 .nr(8)
26780 .kr(1)
26781 .sr(4)
26782 .m(1)
26783 .n(8)
26784 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026785 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026786 }
26787 }
26788
26789 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
26790 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026791 for (uint32_t n = 1; n <= 8; n++) {
26792 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026793 GemmMicrokernelTester()
26794 .mr(1)
26795 .nr(8)
26796 .kr(1)
26797 .sr(4)
26798 .m(m)
26799 .n(n)
26800 .k(k)
26801 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026802 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026803 }
26804 }
26805 }
26806 }
26807
26808 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4) {
26809 for (size_t k = 8; k <= 40; k += 4) {
26810 GemmMicrokernelTester()
26811 .mr(1)
26812 .nr(8)
26813 .kr(1)
26814 .sr(4)
26815 .m(1)
26816 .n(8)
26817 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026818 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026819 }
26820 }
26821
26822 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, k_div_4_subtile) {
26823 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026824 for (uint32_t n = 1; n <= 8; n++) {
26825 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026826 GemmMicrokernelTester()
26827 .mr(1)
26828 .nr(8)
26829 .kr(1)
26830 .sr(4)
26831 .m(m)
26832 .n(n)
26833 .k(k)
26834 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026835 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026836 }
26837 }
26838 }
26839 }
26840
26841 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8) {
26842 for (uint32_t n = 9; n < 16; n++) {
26843 for (size_t k = 1; k <= 20; k += 5) {
26844 GemmMicrokernelTester()
26845 .mr(1)
26846 .nr(8)
26847 .kr(1)
26848 .sr(4)
26849 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026850 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026851 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026852 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026853 }
26854 }
26855 }
26856
26857 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
26858 for (uint32_t n = 9; n < 16; n++) {
26859 for (size_t k = 1; k <= 20; k += 5) {
26860 GemmMicrokernelTester()
26861 .mr(1)
26862 .nr(8)
26863 .kr(1)
26864 .sr(4)
26865 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026866 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026867 .k(k)
26868 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026869 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026870 }
26871 }
26872 }
26873
26874 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
26875 for (uint32_t n = 9; n < 16; n++) {
26876 for (size_t k = 1; k <= 20; k += 5) {
26877 for (uint32_t m = 1; m <= 1; m++) {
26878 GemmMicrokernelTester()
26879 .mr(1)
26880 .nr(8)
26881 .kr(1)
26882 .sr(4)
26883 .m(m)
26884 .n(n)
26885 .k(k)
26886 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026887 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026888 }
26889 }
26890 }
26891 }
26892
26893 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8) {
26894 for (uint32_t n = 16; n <= 24; n += 8) {
26895 for (size_t k = 1; k <= 20; k += 5) {
26896 GemmMicrokernelTester()
26897 .mr(1)
26898 .nr(8)
26899 .kr(1)
26900 .sr(4)
26901 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026902 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026903 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026904 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026905 }
26906 }
26907 }
26908
26909 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
26910 for (uint32_t n = 16; n <= 24; n += 8) {
26911 for (size_t k = 1; k <= 20; k += 5) {
26912 GemmMicrokernelTester()
26913 .mr(1)
26914 .nr(8)
26915 .kr(1)
26916 .sr(4)
26917 .m(1)
26918 .n(n)
26919 .k(k)
26920 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026921 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026922 }
26923 }
26924 }
26925
26926 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_subtile) {
26927 for (uint32_t n = 16; n <= 24; n += 8) {
26928 for (size_t k = 1; k <= 20; k += 5) {
26929 for (uint32_t m = 1; m <= 1; m++) {
26930 GemmMicrokernelTester()
26931 .mr(1)
26932 .nr(8)
26933 .kr(1)
26934 .sr(4)
26935 .m(m)
26936 .n(n)
26937 .k(k)
26938 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026939 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026940 }
26941 }
26942 }
26943 }
26944
26945 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, small_kernel) {
26946 for (size_t k = 1; k <= 20; k += 5) {
26947 GemmMicrokernelTester()
26948 .mr(1)
26949 .nr(8)
26950 .kr(1)
26951 .sr(4)
26952 .m(1)
26953 .n(8)
26954 .k(k)
26955 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026956 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026957 }
26958 }
26959
26960 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, small_kernel_subtile) {
26961 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026962 for (uint32_t n = 1; n <= 8; n++) {
26963 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026964 GemmMicrokernelTester()
26965 .mr(1)
26966 .nr(8)
26967 .kr(1)
26968 .sr(4)
26969 .m(m)
26970 .n(n)
26971 .k(k)
26972 .ks(3)
26973 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026974 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026975 }
26976 }
26977 }
26978 }
26979
26980 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
26981 for (uint32_t n = 9; n < 16; n++) {
26982 for (size_t k = 1; k <= 20; k += 5) {
26983 GemmMicrokernelTester()
26984 .mr(1)
26985 .nr(8)
26986 .kr(1)
26987 .sr(4)
26988 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026989 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026990 .k(k)
26991 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080026992 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070026993 }
26994 }
26995 }
26996
26997 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
26998 for (uint32_t n = 16; n <= 24; n += 8) {
26999 for (size_t k = 1; k <= 20; k += 5) {
27000 GemmMicrokernelTester()
27001 .mr(1)
27002 .nr(8)
27003 .kr(1)
27004 .sr(4)
27005 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027006 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027007 .k(k)
27008 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027009 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027010 }
27011 }
27012 }
27013
27014 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm_subtile) {
27015 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027016 for (uint32_t n = 1; n <= 8; n++) {
27017 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027018 GemmMicrokernelTester()
27019 .mr(1)
27020 .nr(8)
27021 .kr(1)
27022 .sr(4)
27023 .m(m)
27024 .n(n)
27025 .k(k)
27026 .cm_stride(11)
27027 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027028 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027029 }
27030 }
27031 }
27032 }
27033
27034 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, a_offset) {
27035 for (size_t k = 1; k <= 20; k += 5) {
27036 GemmMicrokernelTester()
27037 .mr(1)
27038 .nr(8)
27039 .kr(1)
27040 .sr(4)
27041 .m(1)
27042 .n(8)
27043 .k(k)
27044 .ks(3)
27045 .a_offset(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027046 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027047 }
27048 }
27049
27050 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027051 for (size_t k = 1; k <= 20; k += 5) {
27052 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027053 GemmMicrokernelTester()
27054 .mr(1)
27055 .nr(8)
27056 .kr(1)
27057 .sr(4)
27058 .m(1)
27059 .n(8)
27060 .k(k)
27061 .ks(3)
27062 .a_offset(23)
27063 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027064 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027065 }
27066 }
27067 }
27068
27069 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, qmin) {
27070 GemmMicrokernelTester()
27071 .mr(1)
27072 .nr(8)
27073 .kr(1)
27074 .sr(4)
27075 .m(1)
27076 .n(8)
27077 .k(4)
27078 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027079 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027080 }
27081
27082 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, qmax) {
27083 GemmMicrokernelTester()
27084 .mr(1)
27085 .nr(8)
27086 .kr(1)
27087 .sr(4)
27088 .m(1)
27089 .n(8)
27090 .k(4)
27091 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027092 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027093 }
27094
27095 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_ARM, strided_cm) {
27096 GemmMicrokernelTester()
27097 .mr(1)
27098 .nr(8)
27099 .kr(1)
27100 .sr(4)
27101 .m(1)
27102 .n(8)
27103 .k(4)
27104 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027105 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027106 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027107#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027108
27109
Marat Dukhan4c617792021-12-21 15:47:58 -080027110#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027111 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4) {
27112 GemmMicrokernelTester()
27113 .mr(4)
27114 .nr(8)
27115 .kr(1)
27116 .sr(4)
27117 .m(4)
27118 .n(8)
27119 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027120 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027121 }
27122
27123 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, strided_cn) {
27124 GemmMicrokernelTester()
27125 .mr(4)
27126 .nr(8)
27127 .kr(1)
27128 .sr(4)
27129 .m(4)
27130 .n(8)
27131 .k(4)
27132 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027133 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027134 }
27135
27136 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027137 for (uint32_t n = 1; n <= 8; n++) {
27138 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027139 GemmMicrokernelTester()
27140 .mr(4)
27141 .nr(8)
27142 .kr(1)
27143 .sr(4)
27144 .m(m)
27145 .n(n)
27146 .k(4)
27147 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027148 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027149 }
27150 }
27151 }
27152
27153 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
27154 for (uint32_t m = 1; m <= 4; m++) {
27155 GemmMicrokernelTester()
27156 .mr(4)
27157 .nr(8)
27158 .kr(1)
27159 .sr(4)
27160 .m(m)
27161 .n(8)
27162 .k(4)
27163 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027164 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027165 }
27166 }
27167
27168 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
27169 for (uint32_t n = 1; n <= 8; n++) {
27170 GemmMicrokernelTester()
27171 .mr(4)
27172 .nr(8)
27173 .kr(1)
27174 .sr(4)
27175 .m(4)
27176 .n(n)
27177 .k(4)
27178 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027179 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027180 }
27181 }
27182
27183 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4) {
27184 for (size_t k = 1; k < 4; k++) {
27185 GemmMicrokernelTester()
27186 .mr(4)
27187 .nr(8)
27188 .kr(1)
27189 .sr(4)
27190 .m(4)
27191 .n(8)
27192 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027193 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027194 }
27195 }
27196
27197 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
27198 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027199 for (uint32_t n = 1; n <= 8; n++) {
27200 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027201 GemmMicrokernelTester()
27202 .mr(4)
27203 .nr(8)
27204 .kr(1)
27205 .sr(4)
27206 .m(m)
27207 .n(n)
27208 .k(k)
27209 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027210 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027211 }
27212 }
27213 }
27214 }
27215
27216 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4) {
27217 for (size_t k = 5; k < 8; k++) {
27218 GemmMicrokernelTester()
27219 .mr(4)
27220 .nr(8)
27221 .kr(1)
27222 .sr(4)
27223 .m(4)
27224 .n(8)
27225 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027226 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027227 }
27228 }
27229
27230 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
27231 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027232 for (uint32_t n = 1; n <= 8; n++) {
27233 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027234 GemmMicrokernelTester()
27235 .mr(4)
27236 .nr(8)
27237 .kr(1)
27238 .sr(4)
27239 .m(m)
27240 .n(n)
27241 .k(k)
27242 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027243 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027244 }
27245 }
27246 }
27247 }
27248
27249 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4) {
27250 for (size_t k = 8; k <= 40; k += 4) {
27251 GemmMicrokernelTester()
27252 .mr(4)
27253 .nr(8)
27254 .kr(1)
27255 .sr(4)
27256 .m(4)
27257 .n(8)
27258 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027259 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027260 }
27261 }
27262
27263 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, k_div_4_subtile) {
27264 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027265 for (uint32_t n = 1; n <= 8; n++) {
27266 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027267 GemmMicrokernelTester()
27268 .mr(4)
27269 .nr(8)
27270 .kr(1)
27271 .sr(4)
27272 .m(m)
27273 .n(n)
27274 .k(k)
27275 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027276 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027277 }
27278 }
27279 }
27280 }
27281
27282 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8) {
27283 for (uint32_t n = 9; n < 16; n++) {
27284 for (size_t k = 1; k <= 20; k += 5) {
27285 GemmMicrokernelTester()
27286 .mr(4)
27287 .nr(8)
27288 .kr(1)
27289 .sr(4)
27290 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027291 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027292 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027293 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027294 }
27295 }
27296 }
27297
27298 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
27299 for (uint32_t n = 9; n < 16; n++) {
27300 for (size_t k = 1; k <= 20; k += 5) {
27301 GemmMicrokernelTester()
27302 .mr(4)
27303 .nr(8)
27304 .kr(1)
27305 .sr(4)
27306 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027307 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027308 .k(k)
27309 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027310 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027311 }
27312 }
27313 }
27314
27315 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
27316 for (uint32_t n = 9; n < 16; n++) {
27317 for (size_t k = 1; k <= 20; k += 5) {
27318 for (uint32_t m = 1; m <= 4; m++) {
27319 GemmMicrokernelTester()
27320 .mr(4)
27321 .nr(8)
27322 .kr(1)
27323 .sr(4)
27324 .m(m)
27325 .n(n)
27326 .k(k)
27327 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027328 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027329 }
27330 }
27331 }
27332 }
27333
27334 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8) {
27335 for (uint32_t n = 16; n <= 24; n += 8) {
27336 for (size_t k = 1; k <= 20; k += 5) {
27337 GemmMicrokernelTester()
27338 .mr(4)
27339 .nr(8)
27340 .kr(1)
27341 .sr(4)
27342 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027343 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027344 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027345 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027346 }
27347 }
27348 }
27349
27350 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
27351 for (uint32_t n = 16; n <= 24; n += 8) {
27352 for (size_t k = 1; k <= 20; k += 5) {
27353 GemmMicrokernelTester()
27354 .mr(4)
27355 .nr(8)
27356 .kr(1)
27357 .sr(4)
27358 .m(4)
27359 .n(n)
27360 .k(k)
27361 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027362 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027363 }
27364 }
27365 }
27366
27367 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_subtile) {
27368 for (uint32_t n = 16; n <= 24; n += 8) {
27369 for (size_t k = 1; k <= 20; k += 5) {
27370 for (uint32_t m = 1; m <= 4; m++) {
27371 GemmMicrokernelTester()
27372 .mr(4)
27373 .nr(8)
27374 .kr(1)
27375 .sr(4)
27376 .m(m)
27377 .n(n)
27378 .k(k)
27379 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027380 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027381 }
27382 }
27383 }
27384 }
27385
27386 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, small_kernel) {
27387 for (size_t k = 1; k <= 20; k += 5) {
27388 GemmMicrokernelTester()
27389 .mr(4)
27390 .nr(8)
27391 .kr(1)
27392 .sr(4)
27393 .m(4)
27394 .n(8)
27395 .k(k)
27396 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027397 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027398 }
27399 }
27400
27401 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, small_kernel_subtile) {
27402 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027403 for (uint32_t n = 1; n <= 8; n++) {
27404 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027405 GemmMicrokernelTester()
27406 .mr(4)
27407 .nr(8)
27408 .kr(1)
27409 .sr(4)
27410 .m(m)
27411 .n(n)
27412 .k(k)
27413 .ks(3)
27414 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027415 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027416 }
27417 }
27418 }
27419 }
27420
27421 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
27422 for (uint32_t n = 9; n < 16; n++) {
27423 for (size_t k = 1; k <= 20; k += 5) {
27424 GemmMicrokernelTester()
27425 .mr(4)
27426 .nr(8)
27427 .kr(1)
27428 .sr(4)
27429 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027430 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027431 .k(k)
27432 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027433 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027434 }
27435 }
27436 }
27437
27438 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
27439 for (uint32_t n = 16; n <= 24; n += 8) {
27440 for (size_t k = 1; k <= 20; k += 5) {
27441 GemmMicrokernelTester()
27442 .mr(4)
27443 .nr(8)
27444 .kr(1)
27445 .sr(4)
27446 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027447 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027448 .k(k)
27449 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027450 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027451 }
27452 }
27453 }
27454
27455 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm_subtile) {
27456 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027457 for (uint32_t n = 1; n <= 8; n++) {
27458 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027459 GemmMicrokernelTester()
27460 .mr(4)
27461 .nr(8)
27462 .kr(1)
27463 .sr(4)
27464 .m(m)
27465 .n(n)
27466 .k(k)
27467 .cm_stride(11)
27468 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027469 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027470 }
27471 }
27472 }
27473 }
27474
27475 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, a_offset) {
27476 for (size_t k = 1; k <= 20; k += 5) {
27477 GemmMicrokernelTester()
27478 .mr(4)
27479 .nr(8)
27480 .kr(1)
27481 .sr(4)
27482 .m(4)
27483 .n(8)
27484 .k(k)
27485 .ks(3)
27486 .a_offset(83)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027487 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027488 }
27489 }
27490
27491 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027492 for (size_t k = 1; k <= 20; k += 5) {
27493 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027494 GemmMicrokernelTester()
27495 .mr(4)
27496 .nr(8)
27497 .kr(1)
27498 .sr(4)
27499 .m(4)
27500 .n(8)
27501 .k(k)
27502 .ks(3)
27503 .a_offset(83)
27504 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027505 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027506 }
27507 }
27508 }
27509
27510 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, qmin) {
27511 GemmMicrokernelTester()
27512 .mr(4)
27513 .nr(8)
27514 .kr(1)
27515 .sr(4)
27516 .m(4)
27517 .n(8)
27518 .k(4)
27519 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027520 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027521 }
27522
27523 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, qmax) {
27524 GemmMicrokernelTester()
27525 .mr(4)
27526 .nr(8)
27527 .kr(1)
27528 .sr(4)
27529 .m(4)
27530 .n(8)
27531 .k(4)
27532 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027533 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027534 }
27535
27536 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_ARM, strided_cm) {
27537 GemmMicrokernelTester()
27538 .mr(4)
27539 .nr(8)
27540 .kr(1)
27541 .sr(4)
27542 .m(4)
27543 .n(8)
27544 .k(4)
27545 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027546 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027547 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027548#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027549
27550
Marat Dukhan4c617792021-12-21 15:47:58 -080027551#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027552 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4) {
27553 GemmMicrokernelTester()
27554 .mr(5)
27555 .nr(8)
27556 .kr(1)
27557 .sr(4)
27558 .m(5)
27559 .n(8)
27560 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027561 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027562 }
27563
27564 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, strided_cn) {
27565 GemmMicrokernelTester()
27566 .mr(5)
27567 .nr(8)
27568 .kr(1)
27569 .sr(4)
27570 .m(5)
27571 .n(8)
27572 .k(4)
27573 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027574 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027575 }
27576
27577 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027578 for (uint32_t n = 1; n <= 8; n++) {
27579 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027580 GemmMicrokernelTester()
27581 .mr(5)
27582 .nr(8)
27583 .kr(1)
27584 .sr(4)
27585 .m(m)
27586 .n(n)
27587 .k(4)
27588 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027589 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027590 }
27591 }
27592 }
27593
27594 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
27595 for (uint32_t m = 1; m <= 5; m++) {
27596 GemmMicrokernelTester()
27597 .mr(5)
27598 .nr(8)
27599 .kr(1)
27600 .sr(4)
27601 .m(m)
27602 .n(8)
27603 .k(4)
27604 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027605 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027606 }
27607 }
27608
27609 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
27610 for (uint32_t n = 1; n <= 8; n++) {
27611 GemmMicrokernelTester()
27612 .mr(5)
27613 .nr(8)
27614 .kr(1)
27615 .sr(4)
27616 .m(5)
27617 .n(n)
27618 .k(4)
27619 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027620 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027621 }
27622 }
27623
27624 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4) {
27625 for (size_t k = 1; k < 4; k++) {
27626 GemmMicrokernelTester()
27627 .mr(5)
27628 .nr(8)
27629 .kr(1)
27630 .sr(4)
27631 .m(5)
27632 .n(8)
27633 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027634 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027635 }
27636 }
27637
27638 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
27639 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027640 for (uint32_t n = 1; n <= 8; n++) {
27641 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027642 GemmMicrokernelTester()
27643 .mr(5)
27644 .nr(8)
27645 .kr(1)
27646 .sr(4)
27647 .m(m)
27648 .n(n)
27649 .k(k)
27650 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027651 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027652 }
27653 }
27654 }
27655 }
27656
27657 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4) {
27658 for (size_t k = 5; k < 8; k++) {
27659 GemmMicrokernelTester()
27660 .mr(5)
27661 .nr(8)
27662 .kr(1)
27663 .sr(4)
27664 .m(5)
27665 .n(8)
27666 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027667 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027668 }
27669 }
27670
27671 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
27672 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027673 for (uint32_t n = 1; n <= 8; n++) {
27674 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027675 GemmMicrokernelTester()
27676 .mr(5)
27677 .nr(8)
27678 .kr(1)
27679 .sr(4)
27680 .m(m)
27681 .n(n)
27682 .k(k)
27683 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027684 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027685 }
27686 }
27687 }
27688 }
27689
27690 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4) {
27691 for (size_t k = 8; k <= 40; k += 4) {
27692 GemmMicrokernelTester()
27693 .mr(5)
27694 .nr(8)
27695 .kr(1)
27696 .sr(4)
27697 .m(5)
27698 .n(8)
27699 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027700 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027701 }
27702 }
27703
27704 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, k_div_4_subtile) {
27705 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027706 for (uint32_t n = 1; n <= 8; n++) {
27707 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027708 GemmMicrokernelTester()
27709 .mr(5)
27710 .nr(8)
27711 .kr(1)
27712 .sr(4)
27713 .m(m)
27714 .n(n)
27715 .k(k)
27716 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027717 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027718 }
27719 }
27720 }
27721 }
27722
27723 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8) {
27724 for (uint32_t n = 9; n < 16; n++) {
27725 for (size_t k = 1; k <= 20; k += 5) {
27726 GemmMicrokernelTester()
27727 .mr(5)
27728 .nr(8)
27729 .kr(1)
27730 .sr(4)
27731 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027732 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027733 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027734 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027735 }
27736 }
27737 }
27738
27739 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
27740 for (uint32_t n = 9; n < 16; n++) {
27741 for (size_t k = 1; k <= 20; k += 5) {
27742 GemmMicrokernelTester()
27743 .mr(5)
27744 .nr(8)
27745 .kr(1)
27746 .sr(4)
27747 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027748 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027749 .k(k)
27750 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027751 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027752 }
27753 }
27754 }
27755
27756 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
27757 for (uint32_t n = 9; n < 16; n++) {
27758 for (size_t k = 1; k <= 20; k += 5) {
27759 for (uint32_t m = 1; m <= 5; m++) {
27760 GemmMicrokernelTester()
27761 .mr(5)
27762 .nr(8)
27763 .kr(1)
27764 .sr(4)
27765 .m(m)
27766 .n(n)
27767 .k(k)
27768 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027769 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027770 }
27771 }
27772 }
27773 }
27774
27775 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8) {
27776 for (uint32_t n = 16; n <= 24; n += 8) {
27777 for (size_t k = 1; k <= 20; k += 5) {
27778 GemmMicrokernelTester()
27779 .mr(5)
27780 .nr(8)
27781 .kr(1)
27782 .sr(4)
27783 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027784 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027785 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027786 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027787 }
27788 }
27789 }
27790
27791 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
27792 for (uint32_t n = 16; n <= 24; n += 8) {
27793 for (size_t k = 1; k <= 20; k += 5) {
27794 GemmMicrokernelTester()
27795 .mr(5)
27796 .nr(8)
27797 .kr(1)
27798 .sr(4)
27799 .m(5)
27800 .n(n)
27801 .k(k)
27802 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027803 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027804 }
27805 }
27806 }
27807
27808 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_subtile) {
27809 for (uint32_t n = 16; n <= 24; n += 8) {
27810 for (size_t k = 1; k <= 20; k += 5) {
27811 for (uint32_t m = 1; m <= 5; m++) {
27812 GemmMicrokernelTester()
27813 .mr(5)
27814 .nr(8)
27815 .kr(1)
27816 .sr(4)
27817 .m(m)
27818 .n(n)
27819 .k(k)
27820 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027821 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027822 }
27823 }
27824 }
27825 }
27826
27827 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, small_kernel) {
27828 for (size_t k = 1; k <= 20; k += 5) {
27829 GemmMicrokernelTester()
27830 .mr(5)
27831 .nr(8)
27832 .kr(1)
27833 .sr(4)
27834 .m(5)
27835 .n(8)
27836 .k(k)
27837 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027838 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027839 }
27840 }
27841
27842 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, small_kernel_subtile) {
27843 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027844 for (uint32_t n = 1; n <= 8; n++) {
27845 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027846 GemmMicrokernelTester()
27847 .mr(5)
27848 .nr(8)
27849 .kr(1)
27850 .sr(4)
27851 .m(m)
27852 .n(n)
27853 .k(k)
27854 .ks(3)
27855 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027856 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027857 }
27858 }
27859 }
27860 }
27861
27862 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
27863 for (uint32_t n = 9; n < 16; n++) {
27864 for (size_t k = 1; k <= 20; k += 5) {
27865 GemmMicrokernelTester()
27866 .mr(5)
27867 .nr(8)
27868 .kr(1)
27869 .sr(4)
27870 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027871 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027872 .k(k)
27873 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027874 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027875 }
27876 }
27877 }
27878
27879 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
27880 for (uint32_t n = 16; n <= 24; n += 8) {
27881 for (size_t k = 1; k <= 20; k += 5) {
27882 GemmMicrokernelTester()
27883 .mr(5)
27884 .nr(8)
27885 .kr(1)
27886 .sr(4)
27887 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027888 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027889 .k(k)
27890 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027891 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027892 }
27893 }
27894 }
27895
27896 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm_subtile) {
27897 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027898 for (uint32_t n = 1; n <= 8; n++) {
27899 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027900 GemmMicrokernelTester()
27901 .mr(5)
27902 .nr(8)
27903 .kr(1)
27904 .sr(4)
27905 .m(m)
27906 .n(n)
27907 .k(k)
27908 .cm_stride(11)
27909 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027910 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027911 }
27912 }
27913 }
27914 }
27915
27916 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, a_offset) {
27917 for (size_t k = 1; k <= 20; k += 5) {
27918 GemmMicrokernelTester()
27919 .mr(5)
27920 .nr(8)
27921 .kr(1)
27922 .sr(4)
27923 .m(5)
27924 .n(8)
27925 .k(k)
27926 .ks(3)
27927 .a_offset(103)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027928 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027929 }
27930 }
27931
27932 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027933 for (size_t k = 1; k <= 20; k += 5) {
27934 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027935 GemmMicrokernelTester()
27936 .mr(5)
27937 .nr(8)
27938 .kr(1)
27939 .sr(4)
27940 .m(5)
27941 .n(8)
27942 .k(k)
27943 .ks(3)
27944 .a_offset(103)
27945 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027946 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027947 }
27948 }
27949 }
27950
27951 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, qmin) {
27952 GemmMicrokernelTester()
27953 .mr(5)
27954 .nr(8)
27955 .kr(1)
27956 .sr(4)
27957 .m(5)
27958 .n(8)
27959 .k(4)
27960 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027961 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027962 }
27963
27964 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, qmax) {
27965 GemmMicrokernelTester()
27966 .mr(5)
27967 .nr(8)
27968 .kr(1)
27969 .sr(4)
27970 .m(5)
27971 .n(8)
27972 .k(4)
27973 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027974 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027975 }
27976
27977 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_ARM, strided_cm) {
27978 GemmMicrokernelTester()
27979 .mr(5)
27980 .nr(8)
27981 .kr(1)
27982 .sr(4)
27983 .m(5)
27984 .n(8)
27985 .k(4)
27986 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080027987 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027988 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027989#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027990
27991
Marat Dukhan4c617792021-12-21 15:47:58 -080027992#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070027993 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4) {
27994 GemmMicrokernelTester()
27995 .mr(1)
27996 .nr(8)
27997 .kr(1)
27998 .sr(4)
27999 .m(1)
28000 .n(8)
28001 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028002 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028003 }
28004
28005 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, strided_cn) {
28006 GemmMicrokernelTester()
28007 .mr(1)
28008 .nr(8)
28009 .kr(1)
28010 .sr(4)
28011 .m(1)
28012 .n(8)
28013 .k(4)
28014 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028015 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028016 }
28017
28018 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028019 for (uint32_t n = 1; n <= 8; n++) {
28020 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028021 GemmMicrokernelTester()
28022 .mr(1)
28023 .nr(8)
28024 .kr(1)
28025 .sr(4)
28026 .m(m)
28027 .n(n)
28028 .k(4)
28029 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028030 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028031 }
28032 }
28033 }
28034
28035 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
28036 for (uint32_t m = 1; m <= 1; m++) {
28037 GemmMicrokernelTester()
28038 .mr(1)
28039 .nr(8)
28040 .kr(1)
28041 .sr(4)
28042 .m(m)
28043 .n(8)
28044 .k(4)
28045 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028046 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028047 }
28048 }
28049
28050 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
28051 for (uint32_t n = 1; n <= 8; n++) {
28052 GemmMicrokernelTester()
28053 .mr(1)
28054 .nr(8)
28055 .kr(1)
28056 .sr(4)
28057 .m(1)
28058 .n(n)
28059 .k(4)
28060 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028061 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028062 }
28063 }
28064
28065 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4) {
28066 for (size_t k = 1; k < 4; k++) {
28067 GemmMicrokernelTester()
28068 .mr(1)
28069 .nr(8)
28070 .kr(1)
28071 .sr(4)
28072 .m(1)
28073 .n(8)
28074 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028075 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028076 }
28077 }
28078
28079 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_lt_4_subtile) {
28080 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028081 for (uint32_t n = 1; n <= 8; n++) {
28082 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028083 GemmMicrokernelTester()
28084 .mr(1)
28085 .nr(8)
28086 .kr(1)
28087 .sr(4)
28088 .m(m)
28089 .n(n)
28090 .k(k)
28091 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028092 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028093 }
28094 }
28095 }
28096 }
28097
28098 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4) {
28099 for (size_t k = 5; k < 8; k++) {
28100 GemmMicrokernelTester()
28101 .mr(1)
28102 .nr(8)
28103 .kr(1)
28104 .sr(4)
28105 .m(1)
28106 .n(8)
28107 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028108 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028109 }
28110 }
28111
28112 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_gt_4_subtile) {
28113 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028114 for (uint32_t n = 1; n <= 8; n++) {
28115 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028116 GemmMicrokernelTester()
28117 .mr(1)
28118 .nr(8)
28119 .kr(1)
28120 .sr(4)
28121 .m(m)
28122 .n(n)
28123 .k(k)
28124 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028125 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028126 }
28127 }
28128 }
28129 }
28130
28131 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_div_4) {
28132 for (size_t k = 8; k <= 40; k += 4) {
28133 GemmMicrokernelTester()
28134 .mr(1)
28135 .nr(8)
28136 .kr(1)
28137 .sr(4)
28138 .m(1)
28139 .n(8)
28140 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028141 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028142 }
28143 }
28144
28145 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, k_div_4_subtile) {
28146 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028147 for (uint32_t n = 1; n <= 8; n++) {
28148 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028149 GemmMicrokernelTester()
28150 .mr(1)
28151 .nr(8)
28152 .kr(1)
28153 .sr(4)
28154 .m(m)
28155 .n(n)
28156 .k(k)
28157 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028158 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028159 }
28160 }
28161 }
28162 }
28163
28164 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8) {
28165 for (uint32_t n = 9; n < 16; n++) {
28166 for (size_t k = 1; k <= 20; k += 5) {
28167 GemmMicrokernelTester()
28168 .mr(1)
28169 .nr(8)
28170 .kr(1)
28171 .sr(4)
28172 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028173 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028174 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028175 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028176 }
28177 }
28178 }
28179
28180 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
28181 for (uint32_t n = 9; n < 16; n++) {
28182 for (size_t k = 1; k <= 20; k += 5) {
28183 GemmMicrokernelTester()
28184 .mr(1)
28185 .nr(8)
28186 .kr(1)
28187 .sr(4)
28188 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028189 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028190 .k(k)
28191 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028192 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028193 }
28194 }
28195 }
28196
28197 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_subtile) {
28198 for (uint32_t n = 9; n < 16; n++) {
28199 for (size_t k = 1; k <= 20; k += 5) {
28200 for (uint32_t m = 1; m <= 1; m++) {
28201 GemmMicrokernelTester()
28202 .mr(1)
28203 .nr(8)
28204 .kr(1)
28205 .sr(4)
28206 .m(m)
28207 .n(n)
28208 .k(k)
28209 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028210 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028211 }
28212 }
28213 }
28214 }
28215
28216 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8) {
28217 for (uint32_t n = 16; n <= 24; n += 8) {
28218 for (size_t k = 1; k <= 20; k += 5) {
28219 GemmMicrokernelTester()
28220 .mr(1)
28221 .nr(8)
28222 .kr(1)
28223 .sr(4)
28224 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028225 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028226 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028227 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028228 }
28229 }
28230 }
28231
28232 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
28233 for (uint32_t n = 16; n <= 24; n += 8) {
28234 for (size_t k = 1; k <= 20; k += 5) {
28235 GemmMicrokernelTester()
28236 .mr(1)
28237 .nr(8)
28238 .kr(1)
28239 .sr(4)
28240 .m(1)
28241 .n(n)
28242 .k(k)
28243 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028244 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028245 }
28246 }
28247 }
28248
28249 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_subtile) {
28250 for (uint32_t n = 16; n <= 24; n += 8) {
28251 for (size_t k = 1; k <= 20; k += 5) {
28252 for (uint32_t m = 1; m <= 1; m++) {
28253 GemmMicrokernelTester()
28254 .mr(1)
28255 .nr(8)
28256 .kr(1)
28257 .sr(4)
28258 .m(m)
28259 .n(n)
28260 .k(k)
28261 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028262 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028263 }
28264 }
28265 }
28266 }
28267
28268 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, small_kernel) {
28269 for (size_t k = 1; k <= 20; k += 5) {
28270 GemmMicrokernelTester()
28271 .mr(1)
28272 .nr(8)
28273 .kr(1)
28274 .sr(4)
28275 .m(1)
28276 .n(8)
28277 .k(k)
28278 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028279 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028280 }
28281 }
28282
28283 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, small_kernel_subtile) {
28284 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028285 for (uint32_t n = 1; n <= 8; n++) {
28286 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028287 GemmMicrokernelTester()
28288 .mr(1)
28289 .nr(8)
28290 .kr(1)
28291 .sr(4)
28292 .m(m)
28293 .n(n)
28294 .k(k)
28295 .ks(3)
28296 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028297 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028298 }
28299 }
28300 }
28301 }
28302
28303 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
28304 for (uint32_t n = 9; n < 16; n++) {
28305 for (size_t k = 1; k <= 20; k += 5) {
28306 GemmMicrokernelTester()
28307 .mr(1)
28308 .nr(8)
28309 .kr(1)
28310 .sr(4)
28311 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028312 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028313 .k(k)
28314 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028315 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028316 }
28317 }
28318 }
28319
28320 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
28321 for (uint32_t n = 16; n <= 24; n += 8) {
28322 for (size_t k = 1; k <= 20; k += 5) {
28323 GemmMicrokernelTester()
28324 .mr(1)
28325 .nr(8)
28326 .kr(1)
28327 .sr(4)
28328 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028329 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028330 .k(k)
28331 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028332 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028333 }
28334 }
28335 }
28336
28337 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, strided_cm_subtile) {
28338 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028339 for (uint32_t n = 1; n <= 8; n++) {
28340 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028341 GemmMicrokernelTester()
28342 .mr(1)
28343 .nr(8)
28344 .kr(1)
28345 .sr(4)
28346 .m(m)
28347 .n(n)
28348 .k(k)
28349 .cm_stride(11)
28350 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028351 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028352 }
28353 }
28354 }
28355 }
28356
28357 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, a_offset) {
28358 for (size_t k = 1; k <= 20; k += 5) {
28359 GemmMicrokernelTester()
28360 .mr(1)
28361 .nr(8)
28362 .kr(1)
28363 .sr(4)
28364 .m(1)
28365 .n(8)
28366 .k(k)
28367 .ks(3)
28368 .a_offset(23)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028369 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028370 }
28371 }
28372
28373 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028374 for (size_t k = 1; k <= 20; k += 5) {
28375 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028376 GemmMicrokernelTester()
28377 .mr(1)
28378 .nr(8)
28379 .kr(1)
28380 .sr(4)
28381 .m(1)
28382 .n(8)
28383 .k(k)
28384 .ks(3)
28385 .a_offset(23)
28386 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028387 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028388 }
28389 }
28390 }
28391
28392 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, qmin) {
28393 GemmMicrokernelTester()
28394 .mr(1)
28395 .nr(8)
28396 .kr(1)
28397 .sr(4)
28398 .m(1)
28399 .n(8)
28400 .k(4)
28401 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028402 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028403 }
28404
28405 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, qmax) {
28406 GemmMicrokernelTester()
28407 .mr(1)
28408 .nr(8)
28409 .kr(1)
28410 .sr(4)
28411 .m(1)
28412 .n(8)
28413 .k(4)
28414 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028415 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028416 }
28417
28418 TEST(F32_IGEMM_MINMAX_1X8S4__WASMSIMD_X86, strided_cm) {
28419 GemmMicrokernelTester()
28420 .mr(1)
28421 .nr(8)
28422 .kr(1)
28423 .sr(4)
28424 .m(1)
28425 .n(8)
28426 .k(4)
28427 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028428 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028429 }
Marat Dukhan4c617792021-12-21 15:47:58 -080028430#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028431
28432
Marat Dukhan4c617792021-12-21 15:47:58 -080028433#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028434 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4) {
28435 GemmMicrokernelTester()
28436 .mr(4)
28437 .nr(8)
28438 .kr(1)
28439 .sr(4)
28440 .m(4)
28441 .n(8)
28442 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028443 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028444 }
28445
28446 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, strided_cn) {
28447 GemmMicrokernelTester()
28448 .mr(4)
28449 .nr(8)
28450 .kr(1)
28451 .sr(4)
28452 .m(4)
28453 .n(8)
28454 .k(4)
28455 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028456 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028457 }
28458
28459 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028460 for (uint32_t n = 1; n <= 8; n++) {
28461 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028462 GemmMicrokernelTester()
28463 .mr(4)
28464 .nr(8)
28465 .kr(1)
28466 .sr(4)
28467 .m(m)
28468 .n(n)
28469 .k(4)
28470 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028471 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028472 }
28473 }
28474 }
28475
28476 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
28477 for (uint32_t m = 1; m <= 4; m++) {
28478 GemmMicrokernelTester()
28479 .mr(4)
28480 .nr(8)
28481 .kr(1)
28482 .sr(4)
28483 .m(m)
28484 .n(8)
28485 .k(4)
28486 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028487 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028488 }
28489 }
28490
28491 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
28492 for (uint32_t n = 1; n <= 8; n++) {
28493 GemmMicrokernelTester()
28494 .mr(4)
28495 .nr(8)
28496 .kr(1)
28497 .sr(4)
28498 .m(4)
28499 .n(n)
28500 .k(4)
28501 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028502 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028503 }
28504 }
28505
28506 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4) {
28507 for (size_t k = 1; k < 4; k++) {
28508 GemmMicrokernelTester()
28509 .mr(4)
28510 .nr(8)
28511 .kr(1)
28512 .sr(4)
28513 .m(4)
28514 .n(8)
28515 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028516 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028517 }
28518 }
28519
28520 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_lt_4_subtile) {
28521 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028522 for (uint32_t n = 1; n <= 8; n++) {
28523 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028524 GemmMicrokernelTester()
28525 .mr(4)
28526 .nr(8)
28527 .kr(1)
28528 .sr(4)
28529 .m(m)
28530 .n(n)
28531 .k(k)
28532 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028533 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028534 }
28535 }
28536 }
28537 }
28538
28539 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4) {
28540 for (size_t k = 5; k < 8; k++) {
28541 GemmMicrokernelTester()
28542 .mr(4)
28543 .nr(8)
28544 .kr(1)
28545 .sr(4)
28546 .m(4)
28547 .n(8)
28548 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028549 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028550 }
28551 }
28552
28553 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_gt_4_subtile) {
28554 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028555 for (uint32_t n = 1; n <= 8; n++) {
28556 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028557 GemmMicrokernelTester()
28558 .mr(4)
28559 .nr(8)
28560 .kr(1)
28561 .sr(4)
28562 .m(m)
28563 .n(n)
28564 .k(k)
28565 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028566 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028567 }
28568 }
28569 }
28570 }
28571
28572 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_div_4) {
28573 for (size_t k = 8; k <= 40; k += 4) {
28574 GemmMicrokernelTester()
28575 .mr(4)
28576 .nr(8)
28577 .kr(1)
28578 .sr(4)
28579 .m(4)
28580 .n(8)
28581 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028582 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028583 }
28584 }
28585
28586 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, k_div_4_subtile) {
28587 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028588 for (uint32_t n = 1; n <= 8; n++) {
28589 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028590 GemmMicrokernelTester()
28591 .mr(4)
28592 .nr(8)
28593 .kr(1)
28594 .sr(4)
28595 .m(m)
28596 .n(n)
28597 .k(k)
28598 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028599 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028600 }
28601 }
28602 }
28603 }
28604
28605 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8) {
28606 for (uint32_t n = 9; n < 16; n++) {
28607 for (size_t k = 1; k <= 20; k += 5) {
28608 GemmMicrokernelTester()
28609 .mr(4)
28610 .nr(8)
28611 .kr(1)
28612 .sr(4)
28613 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028614 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028615 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028616 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028617 }
28618 }
28619 }
28620
28621 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
28622 for (uint32_t n = 9; n < 16; n++) {
28623 for (size_t k = 1; k <= 20; k += 5) {
28624 GemmMicrokernelTester()
28625 .mr(4)
28626 .nr(8)
28627 .kr(1)
28628 .sr(4)
28629 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028630 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028631 .k(k)
28632 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028633 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028634 }
28635 }
28636 }
28637
28638 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_subtile) {
28639 for (uint32_t n = 9; n < 16; n++) {
28640 for (size_t k = 1; k <= 20; k += 5) {
28641 for (uint32_t m = 1; m <= 4; m++) {
28642 GemmMicrokernelTester()
28643 .mr(4)
28644 .nr(8)
28645 .kr(1)
28646 .sr(4)
28647 .m(m)
28648 .n(n)
28649 .k(k)
28650 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028651 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028652 }
28653 }
28654 }
28655 }
28656
28657 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8) {
28658 for (uint32_t n = 16; n <= 24; n += 8) {
28659 for (size_t k = 1; k <= 20; k += 5) {
28660 GemmMicrokernelTester()
28661 .mr(4)
28662 .nr(8)
28663 .kr(1)
28664 .sr(4)
28665 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028666 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028667 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028668 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028669 }
28670 }
28671 }
28672
28673 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
28674 for (uint32_t n = 16; n <= 24; n += 8) {
28675 for (size_t k = 1; k <= 20; k += 5) {
28676 GemmMicrokernelTester()
28677 .mr(4)
28678 .nr(8)
28679 .kr(1)
28680 .sr(4)
28681 .m(4)
28682 .n(n)
28683 .k(k)
28684 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028685 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028686 }
28687 }
28688 }
28689
28690 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_subtile) {
28691 for (uint32_t n = 16; n <= 24; n += 8) {
28692 for (size_t k = 1; k <= 20; k += 5) {
28693 for (uint32_t m = 1; m <= 4; m++) {
28694 GemmMicrokernelTester()
28695 .mr(4)
28696 .nr(8)
28697 .kr(1)
28698 .sr(4)
28699 .m(m)
28700 .n(n)
28701 .k(k)
28702 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028703 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028704 }
28705 }
28706 }
28707 }
28708
28709 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, small_kernel) {
28710 for (size_t k = 1; k <= 20; k += 5) {
28711 GemmMicrokernelTester()
28712 .mr(4)
28713 .nr(8)
28714 .kr(1)
28715 .sr(4)
28716 .m(4)
28717 .n(8)
28718 .k(k)
28719 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028720 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028721 }
28722 }
28723
28724 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, small_kernel_subtile) {
28725 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028726 for (uint32_t n = 1; n <= 8; n++) {
28727 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028728 GemmMicrokernelTester()
28729 .mr(4)
28730 .nr(8)
28731 .kr(1)
28732 .sr(4)
28733 .m(m)
28734 .n(n)
28735 .k(k)
28736 .ks(3)
28737 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028738 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028739 }
28740 }
28741 }
28742 }
28743
28744 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
28745 for (uint32_t n = 9; n < 16; n++) {
28746 for (size_t k = 1; k <= 20; k += 5) {
28747 GemmMicrokernelTester()
28748 .mr(4)
28749 .nr(8)
28750 .kr(1)
28751 .sr(4)
28752 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028753 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028754 .k(k)
28755 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028756 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028757 }
28758 }
28759 }
28760
28761 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
28762 for (uint32_t n = 16; n <= 24; n += 8) {
28763 for (size_t k = 1; k <= 20; k += 5) {
28764 GemmMicrokernelTester()
28765 .mr(4)
28766 .nr(8)
28767 .kr(1)
28768 .sr(4)
28769 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028770 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028771 .k(k)
28772 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028773 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028774 }
28775 }
28776 }
28777
28778 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, strided_cm_subtile) {
28779 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028780 for (uint32_t n = 1; n <= 8; n++) {
28781 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028782 GemmMicrokernelTester()
28783 .mr(4)
28784 .nr(8)
28785 .kr(1)
28786 .sr(4)
28787 .m(m)
28788 .n(n)
28789 .k(k)
28790 .cm_stride(11)
28791 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028792 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028793 }
28794 }
28795 }
28796 }
28797
28798 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, a_offset) {
28799 for (size_t k = 1; k <= 20; k += 5) {
28800 GemmMicrokernelTester()
28801 .mr(4)
28802 .nr(8)
28803 .kr(1)
28804 .sr(4)
28805 .m(4)
28806 .n(8)
28807 .k(k)
28808 .ks(3)
28809 .a_offset(83)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028810 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028811 }
28812 }
28813
28814 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028815 for (size_t k = 1; k <= 20; k += 5) {
28816 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028817 GemmMicrokernelTester()
28818 .mr(4)
28819 .nr(8)
28820 .kr(1)
28821 .sr(4)
28822 .m(4)
28823 .n(8)
28824 .k(k)
28825 .ks(3)
28826 .a_offset(83)
28827 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028828 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028829 }
28830 }
28831 }
28832
28833 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, qmin) {
28834 GemmMicrokernelTester()
28835 .mr(4)
28836 .nr(8)
28837 .kr(1)
28838 .sr(4)
28839 .m(4)
28840 .n(8)
28841 .k(4)
28842 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028843 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028844 }
28845
28846 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, qmax) {
28847 GemmMicrokernelTester()
28848 .mr(4)
28849 .nr(8)
28850 .kr(1)
28851 .sr(4)
28852 .m(4)
28853 .n(8)
28854 .k(4)
28855 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028856 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028857 }
28858
28859 TEST(F32_IGEMM_MINMAX_4X8S4__WASMSIMD_X86, strided_cm) {
28860 GemmMicrokernelTester()
28861 .mr(4)
28862 .nr(8)
28863 .kr(1)
28864 .sr(4)
28865 .m(4)
28866 .n(8)
28867 .k(4)
28868 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028869 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028870 }
Marat Dukhan4c617792021-12-21 15:47:58 -080028871#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028872
28873
Marat Dukhan4c617792021-12-21 15:47:58 -080028874#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028875 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4) {
28876 GemmMicrokernelTester()
28877 .mr(5)
28878 .nr(8)
28879 .kr(1)
28880 .sr(4)
28881 .m(5)
28882 .n(8)
28883 .k(4)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028884 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028885 }
28886
28887 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, strided_cn) {
28888 GemmMicrokernelTester()
28889 .mr(5)
28890 .nr(8)
28891 .kr(1)
28892 .sr(4)
28893 .m(5)
28894 .n(8)
28895 .k(4)
28896 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028897 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028898 }
28899
28900 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028901 for (uint32_t n = 1; n <= 8; n++) {
28902 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028903 GemmMicrokernelTester()
28904 .mr(5)
28905 .nr(8)
28906 .kr(1)
28907 .sr(4)
28908 .m(m)
28909 .n(n)
28910 .k(4)
28911 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028912 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028913 }
28914 }
28915 }
28916
28917 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
28918 for (uint32_t m = 1; m <= 5; m++) {
28919 GemmMicrokernelTester()
28920 .mr(5)
28921 .nr(8)
28922 .kr(1)
28923 .sr(4)
28924 .m(m)
28925 .n(8)
28926 .k(4)
28927 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028928 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028929 }
28930 }
28931
28932 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
28933 for (uint32_t n = 1; n <= 8; n++) {
28934 GemmMicrokernelTester()
28935 .mr(5)
28936 .nr(8)
28937 .kr(1)
28938 .sr(4)
28939 .m(5)
28940 .n(n)
28941 .k(4)
28942 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028943 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028944 }
28945 }
28946
28947 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4) {
28948 for (size_t k = 1; k < 4; k++) {
28949 GemmMicrokernelTester()
28950 .mr(5)
28951 .nr(8)
28952 .kr(1)
28953 .sr(4)
28954 .m(5)
28955 .n(8)
28956 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028957 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028958 }
28959 }
28960
28961 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_lt_4_subtile) {
28962 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028963 for (uint32_t n = 1; n <= 8; n++) {
28964 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028965 GemmMicrokernelTester()
28966 .mr(5)
28967 .nr(8)
28968 .kr(1)
28969 .sr(4)
28970 .m(m)
28971 .n(n)
28972 .k(k)
28973 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028974 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028975 }
28976 }
28977 }
28978 }
28979
28980 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4) {
28981 for (size_t k = 5; k < 8; k++) {
28982 GemmMicrokernelTester()
28983 .mr(5)
28984 .nr(8)
28985 .kr(1)
28986 .sr(4)
28987 .m(5)
28988 .n(8)
28989 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080028990 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028991 }
28992 }
28993
28994 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_gt_4_subtile) {
28995 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028996 for (uint32_t n = 1; n <= 8; n++) {
28997 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070028998 GemmMicrokernelTester()
28999 .mr(5)
29000 .nr(8)
29001 .kr(1)
29002 .sr(4)
29003 .m(m)
29004 .n(n)
29005 .k(k)
29006 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029007 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029008 }
29009 }
29010 }
29011 }
29012
29013 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_div_4) {
29014 for (size_t k = 8; k <= 40; k += 4) {
29015 GemmMicrokernelTester()
29016 .mr(5)
29017 .nr(8)
29018 .kr(1)
29019 .sr(4)
29020 .m(5)
29021 .n(8)
29022 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029023 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029024 }
29025 }
29026
29027 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, k_div_4_subtile) {
29028 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029029 for (uint32_t n = 1; n <= 8; n++) {
29030 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029031 GemmMicrokernelTester()
29032 .mr(5)
29033 .nr(8)
29034 .kr(1)
29035 .sr(4)
29036 .m(m)
29037 .n(n)
29038 .k(k)
29039 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029040 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029041 }
29042 }
29043 }
29044 }
29045
29046 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8) {
29047 for (uint32_t n = 9; n < 16; n++) {
29048 for (size_t k = 1; k <= 20; k += 5) {
29049 GemmMicrokernelTester()
29050 .mr(5)
29051 .nr(8)
29052 .kr(1)
29053 .sr(4)
29054 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029055 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029056 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029057 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029058 }
29059 }
29060 }
29061
29062 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
29063 for (uint32_t n = 9; n < 16; n++) {
29064 for (size_t k = 1; k <= 20; k += 5) {
29065 GemmMicrokernelTester()
29066 .mr(5)
29067 .nr(8)
29068 .kr(1)
29069 .sr(4)
29070 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029071 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029072 .k(k)
29073 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029074 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029075 }
29076 }
29077 }
29078
29079 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_subtile) {
29080 for (uint32_t n = 9; n < 16; n++) {
29081 for (size_t k = 1; k <= 20; k += 5) {
29082 for (uint32_t m = 1; m <= 5; m++) {
29083 GemmMicrokernelTester()
29084 .mr(5)
29085 .nr(8)
29086 .kr(1)
29087 .sr(4)
29088 .m(m)
29089 .n(n)
29090 .k(k)
29091 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029092 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029093 }
29094 }
29095 }
29096 }
29097
29098 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8) {
29099 for (uint32_t n = 16; n <= 24; n += 8) {
29100 for (size_t k = 1; k <= 20; k += 5) {
29101 GemmMicrokernelTester()
29102 .mr(5)
29103 .nr(8)
29104 .kr(1)
29105 .sr(4)
29106 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029107 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029108 .k(k)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029109 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029110 }
29111 }
29112 }
29113
29114 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
29115 for (uint32_t n = 16; n <= 24; n += 8) {
29116 for (size_t k = 1; k <= 20; k += 5) {
29117 GemmMicrokernelTester()
29118 .mr(5)
29119 .nr(8)
29120 .kr(1)
29121 .sr(4)
29122 .m(5)
29123 .n(n)
29124 .k(k)
29125 .cn_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029126 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029127 }
29128 }
29129 }
29130
29131 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_subtile) {
29132 for (uint32_t n = 16; n <= 24; n += 8) {
29133 for (size_t k = 1; k <= 20; k += 5) {
29134 for (uint32_t m = 1; m <= 5; m++) {
29135 GemmMicrokernelTester()
29136 .mr(5)
29137 .nr(8)
29138 .kr(1)
29139 .sr(4)
29140 .m(m)
29141 .n(n)
29142 .k(k)
29143 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029144 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029145 }
29146 }
29147 }
29148 }
29149
29150 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, small_kernel) {
29151 for (size_t k = 1; k <= 20; k += 5) {
29152 GemmMicrokernelTester()
29153 .mr(5)
29154 .nr(8)
29155 .kr(1)
29156 .sr(4)
29157 .m(5)
29158 .n(8)
29159 .k(k)
29160 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029161 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029162 }
29163 }
29164
29165 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, small_kernel_subtile) {
29166 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029167 for (uint32_t n = 1; n <= 8; n++) {
29168 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029169 GemmMicrokernelTester()
29170 .mr(5)
29171 .nr(8)
29172 .kr(1)
29173 .sr(4)
29174 .m(m)
29175 .n(n)
29176 .k(k)
29177 .ks(3)
29178 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029179 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029180 }
29181 }
29182 }
29183 }
29184
29185 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
29186 for (uint32_t n = 9; n < 16; n++) {
29187 for (size_t k = 1; k <= 20; k += 5) {
29188 GemmMicrokernelTester()
29189 .mr(5)
29190 .nr(8)
29191 .kr(1)
29192 .sr(4)
29193 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029194 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029195 .k(k)
29196 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029197 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029198 }
29199 }
29200 }
29201
29202 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
29203 for (uint32_t n = 16; n <= 24; n += 8) {
29204 for (size_t k = 1; k <= 20; k += 5) {
29205 GemmMicrokernelTester()
29206 .mr(5)
29207 .nr(8)
29208 .kr(1)
29209 .sr(4)
29210 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029211 .n(n)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029212 .k(k)
29213 .ks(3)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029214 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029215 }
29216 }
29217 }
29218
29219 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, strided_cm_subtile) {
29220 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029221 for (uint32_t n = 1; n <= 8; n++) {
29222 for (uint32_t m = 1; m <= 5; m++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029223 GemmMicrokernelTester()
29224 .mr(5)
29225 .nr(8)
29226 .kr(1)
29227 .sr(4)
29228 .m(m)
29229 .n(n)
29230 .k(k)
29231 .cm_stride(11)
29232 .iterations(1)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029233 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029234 }
29235 }
29236 }
29237 }
29238
29239 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, a_offset) {
29240 for (size_t k = 1; k <= 20; k += 5) {
29241 GemmMicrokernelTester()
29242 .mr(5)
29243 .nr(8)
29244 .kr(1)
29245 .sr(4)
29246 .m(5)
29247 .n(8)
29248 .k(k)
29249 .ks(3)
29250 .a_offset(103)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029251 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029252 }
29253 }
29254
29255 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029256 for (size_t k = 1; k <= 20; k += 5) {
29257 for (uint32_t mz = 0; mz < 5; mz++) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029258 GemmMicrokernelTester()
29259 .mr(5)
29260 .nr(8)
29261 .kr(1)
29262 .sr(4)
29263 .m(5)
29264 .n(8)
29265 .k(k)
29266 .ks(3)
29267 .a_offset(103)
29268 .zero_index(mz)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029269 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029270 }
29271 }
29272 }
29273
29274 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, qmin) {
29275 GemmMicrokernelTester()
29276 .mr(5)
29277 .nr(8)
29278 .kr(1)
29279 .sr(4)
29280 .m(5)
29281 .n(8)
29282 .k(4)
29283 .qmin(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029284 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029285 }
29286
29287 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, qmax) {
29288 GemmMicrokernelTester()
29289 .mr(5)
29290 .nr(8)
29291 .kr(1)
29292 .sr(4)
29293 .m(5)
29294 .n(8)
29295 .k(4)
29296 .qmax(128)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029297 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029298 }
29299
29300 TEST(F32_IGEMM_MINMAX_5X8S4__WASMSIMD_X86, strided_cm) {
29301 GemmMicrokernelTester()
29302 .mr(5)
29303 .nr(8)
29304 .kr(1)
29305 .sr(4)
29306 .m(5)
29307 .n(8)
29308 .k(4)
29309 .cm_stride(11)
Marat Dukhanc83ef3b2021-12-30 09:47:07 -080029310 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029311 }
Marat Dukhan4c617792021-12-21 15:47:58 -080029312#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1bbf96b2020-06-15 23:01:20 -070029313
29314
Marat Dukhan4c617792021-12-21 15:47:58 -080029315#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhande06f492020-04-09 00:19:31 -070029316 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029317 GemmMicrokernelTester()
29318 .mr(1)
29319 .nr(4)
29320 .kr(1)
29321 .sr(1)
29322 .m(1)
29323 .n(4)
29324 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029325 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029326 }
29327
Marat Dukhande06f492020-04-09 00:19:31 -070029328 TEST(F32_IGEMM_MINMAX_1X4__WASM, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029329 GemmMicrokernelTester()
29330 .mr(1)
29331 .nr(4)
29332 .kr(1)
29333 .sr(1)
29334 .m(1)
29335 .n(4)
29336 .k(1)
29337 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029338 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029339 }
29340
Marat Dukhande06f492020-04-09 00:19:31 -070029341 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029342 for (uint32_t n = 1; n <= 4; n++) {
29343 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029344 GemmMicrokernelTester()
29345 .mr(1)
29346 .nr(4)
29347 .kr(1)
29348 .sr(1)
29349 .m(m)
29350 .n(n)
29351 .k(1)
29352 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029353 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029354 }
29355 }
29356 }
29357
Marat Dukhande06f492020-04-09 00:19:31 -070029358 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029359 for (uint32_t m = 1; m <= 1; m++) {
29360 GemmMicrokernelTester()
29361 .mr(1)
29362 .nr(4)
29363 .kr(1)
29364 .sr(1)
29365 .m(m)
29366 .n(4)
29367 .k(1)
29368 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029369 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029370 }
29371 }
29372
Marat Dukhande06f492020-04-09 00:19:31 -070029373 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029374 for (uint32_t n = 1; n <= 4; n++) {
29375 GemmMicrokernelTester()
29376 .mr(1)
29377 .nr(4)
29378 .kr(1)
29379 .sr(1)
29380 .m(1)
29381 .n(n)
29382 .k(1)
29383 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029384 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029385 }
29386 }
29387
Marat Dukhande06f492020-04-09 00:19:31 -070029388 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029389 for (size_t k = 2; k < 10; k++) {
29390 GemmMicrokernelTester()
29391 .mr(1)
29392 .nr(4)
29393 .kr(1)
29394 .sr(1)
29395 .m(1)
29396 .n(4)
29397 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029398 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029399 }
29400 }
29401
Marat Dukhande06f492020-04-09 00:19:31 -070029402 TEST(F32_IGEMM_MINMAX_1X4__WASM, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029403 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029404 for (uint32_t n = 1; n <= 4; n++) {
29405 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029406 GemmMicrokernelTester()
29407 .mr(1)
29408 .nr(4)
29409 .kr(1)
29410 .sr(1)
29411 .m(m)
29412 .n(n)
29413 .k(k)
29414 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029415 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029416 }
29417 }
29418 }
29419 }
29420
Marat Dukhande06f492020-04-09 00:19:31 -070029421 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029422 for (uint32_t n = 5; n < 8; n++) {
29423 for (size_t k = 1; k <= 5; k += 2) {
29424 GemmMicrokernelTester()
29425 .mr(1)
29426 .nr(4)
29427 .kr(1)
29428 .sr(1)
29429 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029430 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029431 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029432 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029433 }
29434 }
29435 }
29436
Marat Dukhande06f492020-04-09 00:19:31 -070029437 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029438 for (uint32_t n = 5; n < 8; n++) {
29439 for (size_t k = 1; k <= 5; k += 2) {
29440 GemmMicrokernelTester()
29441 .mr(1)
29442 .nr(4)
29443 .kr(1)
29444 .sr(1)
29445 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029446 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029447 .k(k)
29448 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029449 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029450 }
29451 }
29452 }
29453
Marat Dukhande06f492020-04-09 00:19:31 -070029454 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029455 for (uint32_t n = 5; n < 8; n++) {
29456 for (size_t k = 1; k <= 5; k += 2) {
29457 for (uint32_t m = 1; m <= 1; m++) {
29458 GemmMicrokernelTester()
29459 .mr(1)
29460 .nr(4)
29461 .kr(1)
29462 .sr(1)
29463 .m(m)
29464 .n(n)
29465 .k(k)
29466 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029467 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029468 }
29469 }
29470 }
29471 }
29472
Marat Dukhande06f492020-04-09 00:19:31 -070029473 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029474 for (uint32_t n = 8; n <= 12; n += 4) {
29475 for (size_t k = 1; k <= 5; k += 2) {
29476 GemmMicrokernelTester()
29477 .mr(1)
29478 .nr(4)
29479 .kr(1)
29480 .sr(1)
29481 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029482 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029483 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029484 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029485 }
29486 }
29487 }
29488
Marat Dukhande06f492020-04-09 00:19:31 -070029489 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029490 for (uint32_t n = 8; n <= 12; n += 4) {
29491 for (size_t k = 1; k <= 5; k += 2) {
29492 GemmMicrokernelTester()
29493 .mr(1)
29494 .nr(4)
29495 .kr(1)
29496 .sr(1)
29497 .m(1)
29498 .n(n)
29499 .k(k)
29500 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029501 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029502 }
29503 }
29504 }
29505
Marat Dukhande06f492020-04-09 00:19:31 -070029506 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029507 for (uint32_t n = 8; n <= 12; n += 4) {
29508 for (size_t k = 1; k <= 5; k += 2) {
29509 for (uint32_t m = 1; m <= 1; m++) {
29510 GemmMicrokernelTester()
29511 .mr(1)
29512 .nr(4)
29513 .kr(1)
29514 .sr(1)
29515 .m(m)
29516 .n(n)
29517 .k(k)
29518 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029519 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029520 }
29521 }
29522 }
29523 }
29524
Marat Dukhande06f492020-04-09 00:19:31 -070029525 TEST(F32_IGEMM_MINMAX_1X4__WASM, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029526 for (size_t k = 1; k <= 5; k += 2) {
29527 GemmMicrokernelTester()
29528 .mr(1)
29529 .nr(4)
29530 .kr(1)
29531 .sr(1)
29532 .m(1)
29533 .n(4)
29534 .k(k)
29535 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029536 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029537 }
29538 }
29539
Marat Dukhande06f492020-04-09 00:19:31 -070029540 TEST(F32_IGEMM_MINMAX_1X4__WASM, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029541 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029542 for (uint32_t n = 1; n <= 4; n++) {
29543 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029544 GemmMicrokernelTester()
29545 .mr(1)
29546 .nr(4)
29547 .kr(1)
29548 .sr(1)
29549 .m(m)
29550 .n(n)
29551 .k(k)
29552 .ks(3)
29553 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029554 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029555 }
29556 }
29557 }
29558 }
29559
Marat Dukhande06f492020-04-09 00:19:31 -070029560 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_gt_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029561 for (uint32_t n = 5; n < 8; n++) {
29562 for (size_t k = 1; k <= 5; k += 2) {
29563 GemmMicrokernelTester()
29564 .mr(1)
29565 .nr(4)
29566 .kr(1)
29567 .sr(1)
29568 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029569 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029570 .k(k)
29571 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029572 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029573 }
29574 }
29575 }
29576
Marat Dukhande06f492020-04-09 00:19:31 -070029577 TEST(F32_IGEMM_MINMAX_1X4__WASM, n_div_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029578 for (uint32_t n = 8; n <= 12; n += 4) {
29579 for (size_t k = 1; k <= 5; k += 2) {
29580 GemmMicrokernelTester()
29581 .mr(1)
29582 .nr(4)
29583 .kr(1)
29584 .sr(1)
29585 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029586 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029587 .k(k)
29588 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029589 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029590 }
29591 }
29592 }
29593
Marat Dukhande06f492020-04-09 00:19:31 -070029594 TEST(F32_IGEMM_MINMAX_1X4__WASM, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029595 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029596 for (uint32_t n = 1; n <= 4; n++) {
29597 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029598 GemmMicrokernelTester()
29599 .mr(1)
29600 .nr(4)
29601 .kr(1)
29602 .sr(1)
29603 .m(m)
29604 .n(n)
29605 .k(k)
29606 .cm_stride(7)
29607 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029608 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029609 }
29610 }
29611 }
29612 }
29613
Marat Dukhande06f492020-04-09 00:19:31 -070029614 TEST(F32_IGEMM_MINMAX_1X4__WASM, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029615 for (size_t k = 1; k <= 5; k += 2) {
29616 GemmMicrokernelTester()
29617 .mr(1)
29618 .nr(4)
29619 .kr(1)
29620 .sr(1)
29621 .m(1)
29622 .n(4)
29623 .k(k)
29624 .ks(3)
29625 .a_offset(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029626 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029627 }
29628 }
29629
Marat Dukhande06f492020-04-09 00:19:31 -070029630 TEST(F32_IGEMM_MINMAX_1X4__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029631 for (size_t k = 1; k <= 5; k += 2) {
29632 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029633 GemmMicrokernelTester()
29634 .mr(1)
29635 .nr(4)
29636 .kr(1)
29637 .sr(1)
29638 .m(1)
29639 .n(4)
29640 .k(k)
29641 .ks(3)
29642 .a_offset(7)
29643 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029644 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029645 }
29646 }
29647 }
29648
Marat Dukhande06f492020-04-09 00:19:31 -070029649 TEST(F32_IGEMM_MINMAX_1X4__WASM, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029650 GemmMicrokernelTester()
29651 .mr(1)
29652 .nr(4)
29653 .kr(1)
29654 .sr(1)
29655 .m(1)
29656 .n(4)
29657 .k(1)
29658 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029659 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029660 }
29661
Marat Dukhande06f492020-04-09 00:19:31 -070029662 TEST(F32_IGEMM_MINMAX_1X4__WASM, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029663 GemmMicrokernelTester()
29664 .mr(1)
29665 .nr(4)
29666 .kr(1)
29667 .sr(1)
29668 .m(1)
29669 .n(4)
29670 .k(1)
29671 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029672 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029673 }
29674
Marat Dukhande06f492020-04-09 00:19:31 -070029675 TEST(F32_IGEMM_MINMAX_1X4__WASM, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029676 GemmMicrokernelTester()
29677 .mr(1)
29678 .nr(4)
29679 .kr(1)
29680 .sr(1)
29681 .m(1)
29682 .n(4)
29683 .k(1)
29684 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029685 .Test(xnn_f32_igemm_minmax_ukernel_1x4__wasm, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029686 }
Marat Dukhan4c617792021-12-21 15:47:58 -080029687#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan1c587112020-04-08 20:04:28 -070029688
29689
Marat Dukhande06f492020-04-09 00:19:31 -070029690TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029691 GemmMicrokernelTester()
29692 .mr(2)
29693 .nr(4)
29694 .kr(1)
29695 .sr(1)
29696 .m(2)
29697 .n(4)
29698 .k(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029699 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029700}
29701
Marat Dukhande06f492020-04-09 00:19:31 -070029702TEST(F32_IGEMM_MINMAX_2X4__SCALAR, strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029703 GemmMicrokernelTester()
29704 .mr(2)
29705 .nr(4)
29706 .kr(1)
29707 .sr(1)
29708 .m(2)
29709 .n(4)
29710 .k(1)
29711 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029712 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029713}
29714
Marat Dukhande06f492020-04-09 00:19:31 -070029715TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029716 for (uint32_t n = 1; n <= 4; n++) {
29717 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029718 GemmMicrokernelTester()
29719 .mr(2)
29720 .nr(4)
29721 .kr(1)
29722 .sr(1)
29723 .m(m)
29724 .n(n)
29725 .k(1)
29726 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029727 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029728 }
29729 }
29730}
29731
Marat Dukhande06f492020-04-09 00:19:31 -070029732TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029733 for (uint32_t m = 1; m <= 2; m++) {
29734 GemmMicrokernelTester()
29735 .mr(2)
29736 .nr(4)
29737 .kr(1)
29738 .sr(1)
29739 .m(m)
29740 .n(4)
29741 .k(1)
29742 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029743 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029744 }
29745}
29746
Marat Dukhande06f492020-04-09 00:19:31 -070029747TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029748 for (uint32_t n = 1; n <= 4; n++) {
29749 GemmMicrokernelTester()
29750 .mr(2)
29751 .nr(4)
29752 .kr(1)
29753 .sr(1)
29754 .m(2)
29755 .n(n)
29756 .k(1)
29757 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029758 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029759 }
29760}
29761
Marat Dukhande06f492020-04-09 00:19:31 -070029762TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_gt_1) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029763 for (size_t k = 2; k < 10; k++) {
29764 GemmMicrokernelTester()
29765 .mr(2)
29766 .nr(4)
29767 .kr(1)
29768 .sr(1)
29769 .m(2)
29770 .n(4)
29771 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029772 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029773 }
29774}
29775
Marat Dukhande06f492020-04-09 00:19:31 -070029776TEST(F32_IGEMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029777 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029778 for (uint32_t n = 1; n <= 4; n++) {
29779 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029780 GemmMicrokernelTester()
29781 .mr(2)
29782 .nr(4)
29783 .kr(1)
29784 .sr(1)
29785 .m(m)
29786 .n(n)
29787 .k(k)
29788 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029789 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029790 }
29791 }
29792 }
29793}
29794
Marat Dukhande06f492020-04-09 00:19:31 -070029795TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029796 for (uint32_t n = 5; n < 8; n++) {
29797 for (size_t k = 1; k <= 5; k += 2) {
29798 GemmMicrokernelTester()
29799 .mr(2)
29800 .nr(4)
29801 .kr(1)
29802 .sr(1)
29803 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029804 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029805 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029806 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029807 }
29808 }
29809}
29810
Marat Dukhande06f492020-04-09 00:19:31 -070029811TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029812 for (uint32_t n = 5; n < 8; n++) {
29813 for (size_t k = 1; k <= 5; k += 2) {
29814 GemmMicrokernelTester()
29815 .mr(2)
29816 .nr(4)
29817 .kr(1)
29818 .sr(1)
29819 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029820 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029821 .k(k)
29822 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029823 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029824 }
29825 }
29826}
29827
Marat Dukhande06f492020-04-09 00:19:31 -070029828TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029829 for (uint32_t n = 5; n < 8; n++) {
29830 for (size_t k = 1; k <= 5; k += 2) {
29831 for (uint32_t m = 1; m <= 2; m++) {
29832 GemmMicrokernelTester()
29833 .mr(2)
29834 .nr(4)
29835 .kr(1)
29836 .sr(1)
29837 .m(m)
29838 .n(n)
29839 .k(k)
29840 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029841 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029842 }
29843 }
29844 }
29845}
29846
Marat Dukhande06f492020-04-09 00:19:31 -070029847TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029848 for (uint32_t n = 8; n <= 12; n += 4) {
29849 for (size_t k = 1; k <= 5; k += 2) {
29850 GemmMicrokernelTester()
29851 .mr(2)
29852 .nr(4)
29853 .kr(1)
29854 .sr(1)
29855 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029856 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029857 .k(k)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029858 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029859 }
29860 }
29861}
29862
Marat Dukhande06f492020-04-09 00:19:31 -070029863TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029864 for (uint32_t n = 8; n <= 12; n += 4) {
29865 for (size_t k = 1; k <= 5; k += 2) {
29866 GemmMicrokernelTester()
29867 .mr(2)
29868 .nr(4)
29869 .kr(1)
29870 .sr(1)
29871 .m(2)
29872 .n(n)
29873 .k(k)
29874 .cn_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029875 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029876 }
29877 }
29878}
29879
Marat Dukhande06f492020-04-09 00:19:31 -070029880TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029881 for (uint32_t n = 8; n <= 12; n += 4) {
29882 for (size_t k = 1; k <= 5; k += 2) {
29883 for (uint32_t m = 1; m <= 2; m++) {
29884 GemmMicrokernelTester()
29885 .mr(2)
29886 .nr(4)
29887 .kr(1)
29888 .sr(1)
29889 .m(m)
29890 .n(n)
29891 .k(k)
29892 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029893 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029894 }
29895 }
29896 }
29897}
29898
Marat Dukhande06f492020-04-09 00:19:31 -070029899TEST(F32_IGEMM_MINMAX_2X4__SCALAR, small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029900 for (size_t k = 1; k <= 5; k += 2) {
29901 GemmMicrokernelTester()
29902 .mr(2)
29903 .nr(4)
29904 .kr(1)
29905 .sr(1)
29906 .m(2)
29907 .n(4)
29908 .k(k)
29909 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029910 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029911 }
29912}
29913
Marat Dukhande06f492020-04-09 00:19:31 -070029914TEST(F32_IGEMM_MINMAX_2X4__SCALAR, small_kernel_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029915 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029916 for (uint32_t n = 1; n <= 4; n++) {
29917 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029918 GemmMicrokernelTester()
29919 .mr(2)
29920 .nr(4)
29921 .kr(1)
29922 .sr(1)
29923 .m(m)
29924 .n(n)
29925 .k(k)
29926 .ks(3)
29927 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029928 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029929 }
29930 }
29931 }
29932}
29933
Marat Dukhande06f492020-04-09 00:19:31 -070029934TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029935 for (uint32_t n = 5; n < 8; n++) {
29936 for (size_t k = 1; k <= 5; k += 2) {
29937 GemmMicrokernelTester()
29938 .mr(2)
29939 .nr(4)
29940 .kr(1)
29941 .sr(1)
29942 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029943 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029944 .k(k)
29945 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029946 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029947 }
29948 }
29949}
29950
Marat Dukhande06f492020-04-09 00:19:31 -070029951TEST(F32_IGEMM_MINMAX_2X4__SCALAR, n_div_4_small_kernel) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029952 for (uint32_t n = 8; n <= 12; n += 4) {
29953 for (size_t k = 1; k <= 5; k += 2) {
29954 GemmMicrokernelTester()
29955 .mr(2)
29956 .nr(4)
29957 .kr(1)
29958 .sr(1)
29959 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029960 .n(n)
Marat Dukhan1c587112020-04-08 20:04:28 -070029961 .k(k)
29962 .ks(3)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029963 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029964 }
29965 }
29966}
29967
Marat Dukhande06f492020-04-09 00:19:31 -070029968TEST(F32_IGEMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029969 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029970 for (uint32_t n = 1; n <= 4; n++) {
29971 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029972 GemmMicrokernelTester()
29973 .mr(2)
29974 .nr(4)
29975 .kr(1)
29976 .sr(1)
29977 .m(m)
29978 .n(n)
29979 .k(k)
29980 .cm_stride(7)
29981 .iterations(1)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029982 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070029983 }
29984 }
29985 }
29986}
29987
Marat Dukhande06f492020-04-09 00:19:31 -070029988TEST(F32_IGEMM_MINMAX_2X4__SCALAR, a_offset) {
Marat Dukhan1c587112020-04-08 20:04:28 -070029989 for (size_t k = 1; k <= 5; k += 2) {
29990 GemmMicrokernelTester()
29991 .mr(2)
29992 .nr(4)
29993 .kr(1)
29994 .sr(1)
29995 .m(2)
29996 .n(4)
29997 .k(k)
29998 .ks(3)
29999 .a_offset(13)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070030000 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070030001 }
30002}
30003
Marat Dukhande06f492020-04-09 00:19:31 -070030004TEST(F32_IGEMM_MINMAX_2X4__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030005 for (size_t k = 1; k <= 5; k += 2) {
30006 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan1c587112020-04-08 20:04:28 -070030007 GemmMicrokernelTester()
30008 .mr(2)
30009 .nr(4)
30010 .kr(1)
30011 .sr(1)
30012 .m(2)
30013 .n(4)
30014 .k(k)
30015 .ks(3)
30016 .a_offset(13)
30017 .zero_index(mz)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070030018 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070030019 }
30020 }
30021}
30022
Marat Dukhande06f492020-04-09 00:19:31 -070030023TEST(F32_IGEMM_MINMAX_2X4__SCALAR, qmin) {
Marat Dukhan1c587112020-04-08 20:04:28 -070030024 GemmMicrokernelTester()
30025 .mr(2)
30026 .nr(4)
30027 .kr(1)
30028 .sr(1)
30029 .m(2)
30030 .n(4)
30031 .k(1)
30032 .qmin(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070030033 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070030034}
30035
Marat Dukhande06f492020-04-09 00:19:31 -070030036TEST(F32_IGEMM_MINMAX_2X4__SCALAR, qmax) {
Marat Dukhan1c587112020-04-08 20:04:28 -070030037 GemmMicrokernelTester()
30038 .mr(2)
30039 .nr(4)
30040 .kr(1)
30041 .sr(1)
30042 .m(2)
30043 .n(4)
30044 .k(1)
30045 .qmax(128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070030046 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070030047}
30048
Marat Dukhande06f492020-04-09 00:19:31 -070030049TEST(F32_IGEMM_MINMAX_2X4__SCALAR, strided_cm) {
Marat Dukhan1c587112020-04-08 20:04:28 -070030050 GemmMicrokernelTester()
30051 .mr(2)
30052 .nr(4)
30053 .kr(1)
30054 .sr(1)
30055 .m(2)
30056 .n(4)
30057 .k(1)
30058 .cm_stride(7)
Marat Dukhan104ae5e2021-05-24 13:41:57 -070030059 .Test(xnn_f32_igemm_minmax_ukernel_2x4__scalar, xnn_init_f32_minmax_scalar_params);
Marat Dukhan1c587112020-04-08 20:04:28 -070030060}
30061
30062
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030063#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
30064 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2) {
30065 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030066 GemmMicrokernelTester()
30067 .mr(4)
30068 .nr(8)
30069 .kr(1)
30070 .sr(1)
30071 .m(4)
30072 .n(8)
30073 .k(2)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030074 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030075 }
30076
30077 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, strided_cn) {
30078 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030079 GemmMicrokernelTester()
30080 .mr(4)
30081 .nr(8)
30082 .kr(1)
30083 .sr(1)
30084 .m(4)
30085 .n(8)
30086 .k(2)
30087 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030088 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030089 }
30090
30091 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2_subtile) {
30092 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030093 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030094 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030095 GemmMicrokernelTester()
30096 .mr(4)
30097 .nr(8)
30098 .kr(1)
30099 .sr(1)
30100 .m(m)
30101 .n(n)
30102 .k(2)
30103 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030104 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030105 }
30106 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030107 }
30108
30109 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_m) {
30110 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030111 for (uint32_t m = 1; m <= 4; m++) {
30112 GemmMicrokernelTester()
30113 .mr(4)
30114 .nr(8)
30115 .kr(1)
30116 .sr(1)
30117 .m(m)
30118 .n(8)
30119 .k(2)
30120 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030121 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030122 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030123 }
30124
30125 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_n) {
30126 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030127 for (uint32_t n = 1; n <= 8; n++) {
30128 GemmMicrokernelTester()
30129 .mr(4)
30130 .nr(8)
30131 .kr(1)
30132 .sr(1)
30133 .m(4)
30134 .n(n)
30135 .k(2)
30136 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030137 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030138 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030139 }
30140
30141 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_lt_2) {
30142 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030143 for (size_t k = 1; k < 2; k++) {
30144 GemmMicrokernelTester()
30145 .mr(4)
30146 .nr(8)
30147 .kr(1)
30148 .sr(1)
30149 .m(4)
30150 .n(8)
30151 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030152 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030153 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030154 }
30155
30156 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_lt_2_subtile) {
30157 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030158 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030159 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030160 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030161 GemmMicrokernelTester()
30162 .mr(4)
30163 .nr(8)
30164 .kr(1)
30165 .sr(1)
30166 .m(m)
30167 .n(n)
30168 .k(k)
30169 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030170 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030171 }
30172 }
30173 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030174 }
30175
30176 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_gt_2) {
30177 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030178 for (size_t k = 3; k < 4; k++) {
30179 GemmMicrokernelTester()
30180 .mr(4)
30181 .nr(8)
30182 .kr(1)
30183 .sr(1)
30184 .m(4)
30185 .n(8)
30186 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030187 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030188 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030189 }
30190
30191 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_gt_2_subtile) {
30192 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030193 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030194 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030195 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030196 GemmMicrokernelTester()
30197 .mr(4)
30198 .nr(8)
30199 .kr(1)
30200 .sr(1)
30201 .m(m)
30202 .n(n)
30203 .k(k)
30204 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030205 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030206 }
30207 }
30208 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030209 }
30210
30211 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_div_2) {
30212 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030213 for (size_t k = 4; k <= 20; k += 2) {
30214 GemmMicrokernelTester()
30215 .mr(4)
30216 .nr(8)
30217 .kr(1)
30218 .sr(1)
30219 .m(4)
30220 .n(8)
30221 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030222 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030223 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030224 }
30225
30226 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, k_div_2_subtile) {
30227 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030228 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030229 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030230 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030231 GemmMicrokernelTester()
30232 .mr(4)
30233 .nr(8)
30234 .kr(1)
30235 .sr(1)
30236 .m(m)
30237 .n(n)
30238 .k(k)
30239 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030240 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030241 }
30242 }
30243 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030244 }
30245
30246 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8) {
30247 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030248 for (uint32_t n = 9; n < 16; n++) {
30249 for (size_t k = 1; k <= 10; k += 3) {
30250 GemmMicrokernelTester()
30251 .mr(4)
30252 .nr(8)
30253 .kr(1)
30254 .sr(1)
30255 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030256 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030257 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030258 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030259 }
30260 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030261 }
30262
30263 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8_strided_cn) {
30264 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030265 for (uint32_t n = 9; n < 16; n++) {
30266 for (size_t k = 1; k <= 10; k += 3) {
30267 GemmMicrokernelTester()
30268 .mr(4)
30269 .nr(8)
30270 .kr(1)
30271 .sr(1)
30272 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030273 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030274 .k(k)
30275 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030276 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030277 }
30278 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030279 }
30280
30281 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8_subtile) {
30282 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030283 for (uint32_t n = 9; n < 16; n++) {
30284 for (size_t k = 1; k <= 10; k += 3) {
30285 for (uint32_t m = 1; m <= 4; m++) {
30286 GemmMicrokernelTester()
30287 .mr(4)
30288 .nr(8)
30289 .kr(1)
30290 .sr(1)
30291 .m(m)
30292 .n(n)
30293 .k(k)
30294 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030295 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030296 }
30297 }
30298 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030299 }
30300
30301 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8) {
30302 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030303 for (uint32_t n = 16; n <= 24; n += 8) {
30304 for (size_t k = 1; k <= 10; k += 3) {
30305 GemmMicrokernelTester()
30306 .mr(4)
30307 .nr(8)
30308 .kr(1)
30309 .sr(1)
30310 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030311 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030312 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030313 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030314 }
30315 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030316 }
30317
30318 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8_strided_cn) {
30319 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030320 for (uint32_t n = 16; n <= 24; n += 8) {
30321 for (size_t k = 1; k <= 10; k += 3) {
30322 GemmMicrokernelTester()
30323 .mr(4)
30324 .nr(8)
30325 .kr(1)
30326 .sr(1)
30327 .m(4)
30328 .n(n)
30329 .k(k)
30330 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030331 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030332 }
30333 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030334 }
30335
30336 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8_subtile) {
30337 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030338 for (uint32_t n = 16; n <= 24; n += 8) {
30339 for (size_t k = 1; k <= 10; k += 3) {
30340 for (uint32_t m = 1; m <= 4; m++) {
30341 GemmMicrokernelTester()
30342 .mr(4)
30343 .nr(8)
30344 .kr(1)
30345 .sr(1)
30346 .m(m)
30347 .n(n)
30348 .k(k)
30349 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030350 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030351 }
30352 }
30353 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030354 }
30355
30356 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, small_kernel) {
30357 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030358 for (size_t k = 1; k <= 10; k += 3) {
30359 GemmMicrokernelTester()
30360 .mr(4)
30361 .nr(8)
30362 .kr(1)
30363 .sr(1)
30364 .m(4)
30365 .n(8)
30366 .k(k)
30367 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030368 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030369 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030370 }
30371
30372 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, small_kernel_subtile) {
30373 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030374 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030375 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030376 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030377 GemmMicrokernelTester()
30378 .mr(4)
30379 .nr(8)
30380 .kr(1)
30381 .sr(1)
30382 .m(m)
30383 .n(n)
30384 .k(k)
30385 .ks(3)
30386 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030387 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030388 }
30389 }
30390 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030391 }
30392
30393 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_gt_8_small_kernel) {
30394 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030395 for (uint32_t n = 9; n < 16; n++) {
30396 for (size_t k = 1; k <= 10; k += 3) {
30397 GemmMicrokernelTester()
30398 .mr(4)
30399 .nr(8)
30400 .kr(1)
30401 .sr(1)
30402 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030403 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030404 .k(k)
30405 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030406 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030407 }
30408 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030409 }
30410
30411 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, n_div_8_small_kernel) {
30412 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030413 for (uint32_t n = 16; n <= 24; n += 8) {
30414 for (size_t k = 1; k <= 10; k += 3) {
30415 GemmMicrokernelTester()
30416 .mr(4)
30417 .nr(8)
30418 .kr(1)
30419 .sr(1)
30420 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030421 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030422 .k(k)
30423 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030424 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030425 }
30426 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030427 }
30428
30429 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, strided_cm_subtile) {
30430 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030431 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030432 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030433 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030434 GemmMicrokernelTester()
30435 .mr(4)
30436 .nr(8)
30437 .kr(1)
30438 .sr(1)
30439 .m(m)
30440 .n(n)
30441 .k(k)
30442 .cm_stride(11)
30443 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030444 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030445 }
30446 }
30447 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030448 }
30449
30450 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, a_offset) {
30451 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030452 for (size_t k = 1; k <= 10; k += 3) {
30453 GemmMicrokernelTester()
30454 .mr(4)
30455 .nr(8)
30456 .kr(1)
30457 .sr(1)
30458 .m(4)
30459 .n(8)
30460 .k(k)
30461 .ks(3)
30462 .a_offset(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030463 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030464 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030465 }
30466
30467 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, zero) {
30468 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030469 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030470 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030471 GemmMicrokernelTester()
30472 .mr(4)
30473 .nr(8)
30474 .kr(1)
30475 .sr(1)
30476 .m(4)
30477 .n(8)
30478 .k(k)
30479 .ks(3)
30480 .a_offset(43)
30481 .zero_index(mz)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030482 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030483 }
30484 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030485 }
30486
30487 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, qmin) {
30488 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030489 GemmMicrokernelTester()
30490 .mr(4)
30491 .nr(8)
30492 .kr(1)
30493 .sr(1)
30494 .m(4)
30495 .n(8)
30496 .k(2)
30497 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030498 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030499 }
30500
30501 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, qmax) {
30502 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030503 GemmMicrokernelTester()
30504 .mr(4)
30505 .nr(8)
30506 .kr(1)
30507 .sr(1)
30508 .m(4)
30509 .n(8)
30510 .k(2)
30511 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030512 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030513 }
30514
30515 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_LD64, strided_cm) {
30516 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030517 GemmMicrokernelTester()
30518 .mr(4)
30519 .nr(8)
30520 .kr(1)
30521 .sr(1)
30522 .m(4)
30523 .n(8)
30524 .k(2)
30525 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030526 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030527 }
30528#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
30529
30530
30531#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
30532 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2) {
30533 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030534 GemmMicrokernelTester()
30535 .mr(4)
30536 .nr(8)
30537 .kr(1)
30538 .sr(1)
30539 .m(4)
30540 .n(8)
30541 .k(2)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030542 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030543 }
30544
30545 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cn) {
30546 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030547 GemmMicrokernelTester()
30548 .mr(4)
30549 .nr(8)
30550 .kr(1)
30551 .sr(1)
30552 .m(4)
30553 .n(8)
30554 .k(2)
30555 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030556 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030557 }
30558
30559 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile) {
30560 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030561 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030562 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030563 GemmMicrokernelTester()
30564 .mr(4)
30565 .nr(8)
30566 .kr(1)
30567 .sr(1)
30568 .m(m)
30569 .n(n)
30570 .k(2)
30571 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030572 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030573 }
30574 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030575 }
30576
30577 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_m) {
30578 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030579 for (uint32_t m = 1; m <= 4; m++) {
30580 GemmMicrokernelTester()
30581 .mr(4)
30582 .nr(8)
30583 .kr(1)
30584 .sr(1)
30585 .m(m)
30586 .n(8)
30587 .k(2)
30588 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030589 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030590 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030591 }
30592
30593 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_eq_2_subtile_n) {
30594 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030595 for (uint32_t n = 1; n <= 8; n++) {
30596 GemmMicrokernelTester()
30597 .mr(4)
30598 .nr(8)
30599 .kr(1)
30600 .sr(1)
30601 .m(4)
30602 .n(n)
30603 .k(2)
30604 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030605 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030606 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030607 }
30608
30609 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2) {
30610 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030611 for (size_t k = 1; k < 2; k++) {
30612 GemmMicrokernelTester()
30613 .mr(4)
30614 .nr(8)
30615 .kr(1)
30616 .sr(1)
30617 .m(4)
30618 .n(8)
30619 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030620 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030621 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030622 }
30623
30624 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_lt_2_subtile) {
30625 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030626 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030627 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030628 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030629 GemmMicrokernelTester()
30630 .mr(4)
30631 .nr(8)
30632 .kr(1)
30633 .sr(1)
30634 .m(m)
30635 .n(n)
30636 .k(k)
30637 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030638 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030639 }
30640 }
30641 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030642 }
30643
30644 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2) {
30645 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030646 for (size_t k = 3; k < 4; k++) {
30647 GemmMicrokernelTester()
30648 .mr(4)
30649 .nr(8)
30650 .kr(1)
30651 .sr(1)
30652 .m(4)
30653 .n(8)
30654 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030655 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030656 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030657 }
30658
30659 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_gt_2_subtile) {
30660 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030661 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030662 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030663 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030664 GemmMicrokernelTester()
30665 .mr(4)
30666 .nr(8)
30667 .kr(1)
30668 .sr(1)
30669 .m(m)
30670 .n(n)
30671 .k(k)
30672 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030673 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030674 }
30675 }
30676 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030677 }
30678
30679 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2) {
30680 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030681 for (size_t k = 4; k <= 20; k += 2) {
30682 GemmMicrokernelTester()
30683 .mr(4)
30684 .nr(8)
30685 .kr(1)
30686 .sr(1)
30687 .m(4)
30688 .n(8)
30689 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030690 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030691 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030692 }
30693
30694 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, k_div_2_subtile) {
30695 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030696 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030697 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030698 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030699 GemmMicrokernelTester()
30700 .mr(4)
30701 .nr(8)
30702 .kr(1)
30703 .sr(1)
30704 .m(m)
30705 .n(n)
30706 .k(k)
30707 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030708 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030709 }
30710 }
30711 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030712 }
30713
30714 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8) {
30715 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030716 for (uint32_t n = 9; n < 16; n++) {
30717 for (size_t k = 1; k <= 10; k += 3) {
30718 GemmMicrokernelTester()
30719 .mr(4)
30720 .nr(8)
30721 .kr(1)
30722 .sr(1)
30723 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030724 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030725 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030726 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030727 }
30728 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030729 }
30730
30731 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_strided_cn) {
30732 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030733 for (uint32_t n = 9; n < 16; n++) {
30734 for (size_t k = 1; k <= 10; k += 3) {
30735 GemmMicrokernelTester()
30736 .mr(4)
30737 .nr(8)
30738 .kr(1)
30739 .sr(1)
30740 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030741 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030742 .k(k)
30743 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030744 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030745 }
30746 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030747 }
30748
30749 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_subtile) {
30750 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030751 for (uint32_t n = 9; n < 16; n++) {
30752 for (size_t k = 1; k <= 10; k += 3) {
30753 for (uint32_t m = 1; m <= 4; m++) {
30754 GemmMicrokernelTester()
30755 .mr(4)
30756 .nr(8)
30757 .kr(1)
30758 .sr(1)
30759 .m(m)
30760 .n(n)
30761 .k(k)
30762 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030763 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030764 }
30765 }
30766 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030767 }
30768
30769 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8) {
30770 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030771 for (uint32_t n = 16; n <= 24; n += 8) {
30772 for (size_t k = 1; k <= 10; k += 3) {
30773 GemmMicrokernelTester()
30774 .mr(4)
30775 .nr(8)
30776 .kr(1)
30777 .sr(1)
30778 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030779 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030780 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030781 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030782 }
30783 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030784 }
30785
30786 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_strided_cn) {
30787 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030788 for (uint32_t n = 16; n <= 24; n += 8) {
30789 for (size_t k = 1; k <= 10; k += 3) {
30790 GemmMicrokernelTester()
30791 .mr(4)
30792 .nr(8)
30793 .kr(1)
30794 .sr(1)
30795 .m(4)
30796 .n(n)
30797 .k(k)
30798 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030799 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030800 }
30801 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030802 }
30803
30804 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_subtile) {
30805 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030806 for (uint32_t n = 16; n <= 24; n += 8) {
30807 for (size_t k = 1; k <= 10; k += 3) {
30808 for (uint32_t m = 1; m <= 4; m++) {
30809 GemmMicrokernelTester()
30810 .mr(4)
30811 .nr(8)
30812 .kr(1)
30813 .sr(1)
30814 .m(m)
30815 .n(n)
30816 .k(k)
30817 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030818 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030819 }
30820 }
30821 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030822 }
30823
30824 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, small_kernel) {
30825 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030826 for (size_t k = 1; k <= 10; k += 3) {
30827 GemmMicrokernelTester()
30828 .mr(4)
30829 .nr(8)
30830 .kr(1)
30831 .sr(1)
30832 .m(4)
30833 .n(8)
30834 .k(k)
30835 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030836 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030837 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030838 }
30839
30840 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, small_kernel_subtile) {
30841 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030842 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030843 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030844 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030845 GemmMicrokernelTester()
30846 .mr(4)
30847 .nr(8)
30848 .kr(1)
30849 .sr(1)
30850 .m(m)
30851 .n(n)
30852 .k(k)
30853 .ks(3)
30854 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030855 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030856 }
30857 }
30858 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030859 }
30860
30861 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_gt_8_small_kernel) {
30862 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030863 for (uint32_t n = 9; n < 16; n++) {
30864 for (size_t k = 1; k <= 10; k += 3) {
30865 GemmMicrokernelTester()
30866 .mr(4)
30867 .nr(8)
30868 .kr(1)
30869 .sr(1)
30870 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030871 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030872 .k(k)
30873 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030874 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030875 }
30876 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030877 }
30878
30879 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, n_div_8_small_kernel) {
30880 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030881 for (uint32_t n = 16; n <= 24; n += 8) {
30882 for (size_t k = 1; k <= 10; k += 3) {
30883 GemmMicrokernelTester()
30884 .mr(4)
30885 .nr(8)
30886 .kr(1)
30887 .sr(1)
30888 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030889 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030890 .k(k)
30891 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030892 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030893 }
30894 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030895 }
30896
30897 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cm_subtile) {
30898 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030899 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030900 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030901 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030902 GemmMicrokernelTester()
30903 .mr(4)
30904 .nr(8)
30905 .kr(1)
30906 .sr(1)
30907 .m(m)
30908 .n(n)
30909 .k(k)
30910 .cm_stride(11)
30911 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030912 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030913 }
30914 }
30915 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030916 }
30917
30918 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, a_offset) {
30919 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030920 for (size_t k = 1; k <= 10; k += 3) {
30921 GemmMicrokernelTester()
30922 .mr(4)
30923 .nr(8)
30924 .kr(1)
30925 .sr(1)
30926 .m(4)
30927 .n(8)
30928 .k(k)
30929 .ks(3)
30930 .a_offset(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030931 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030932 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030933 }
30934
30935 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, zero) {
30936 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030937 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030938 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030939 GemmMicrokernelTester()
30940 .mr(4)
30941 .nr(8)
30942 .kr(1)
30943 .sr(1)
30944 .m(4)
30945 .n(8)
30946 .k(k)
30947 .ks(3)
30948 .a_offset(43)
30949 .zero_index(mz)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030950 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030951 }
30952 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030953 }
30954
30955 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, qmin) {
30956 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030957 GemmMicrokernelTester()
30958 .mr(4)
30959 .nr(8)
30960 .kr(1)
30961 .sr(1)
30962 .m(4)
30963 .n(8)
30964 .k(2)
30965 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030966 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030967 }
30968
30969 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, qmax) {
30970 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030971 GemmMicrokernelTester()
30972 .mr(4)
30973 .nr(8)
30974 .kr(1)
30975 .sr(1)
30976 .m(4)
30977 .n(8)
30978 .k(2)
30979 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030980 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030981 }
30982
30983 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A7, strided_cm) {
30984 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030985 GemmMicrokernelTester()
30986 .mr(4)
30987 .nr(8)
30988 .kr(1)
30989 .sr(1)
30990 .m(4)
30991 .n(8)
30992 .k(2)
30993 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080030994 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a7, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080030995 }
30996#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
30997
30998
30999#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
31000 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4) {
31001 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031002 GemmMicrokernelTester()
31003 .mr(4)
31004 .nr(8)
31005 .kr(1)
31006 .sr(1)
31007 .m(4)
31008 .n(8)
31009 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031010 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031011 }
31012
31013 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cn) {
31014 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031015 GemmMicrokernelTester()
31016 .mr(4)
31017 .nr(8)
31018 .kr(1)
31019 .sr(1)
31020 .m(4)
31021 .n(8)
31022 .k(4)
31023 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031024 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031025 }
31026
31027 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile) {
31028 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031029 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031030 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031031 GemmMicrokernelTester()
31032 .mr(4)
31033 .nr(8)
31034 .kr(1)
31035 .sr(1)
31036 .m(m)
31037 .n(n)
31038 .k(4)
31039 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031040 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031041 }
31042 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031043 }
31044
31045 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_m) {
31046 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031047 for (uint32_t m = 1; m <= 4; m++) {
31048 GemmMicrokernelTester()
31049 .mr(4)
31050 .nr(8)
31051 .kr(1)
31052 .sr(1)
31053 .m(m)
31054 .n(8)
31055 .k(4)
31056 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031057 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031058 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031059 }
31060
31061 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_4_subtile_n) {
31062 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031063 for (uint32_t n = 1; n <= 8; n++) {
31064 GemmMicrokernelTester()
31065 .mr(4)
31066 .nr(8)
31067 .kr(1)
31068 .sr(1)
31069 .m(4)
31070 .n(n)
31071 .k(4)
31072 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031073 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031074 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031075 }
31076
31077 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8) {
31078 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031079 GemmMicrokernelTester()
31080 .mr(4)
31081 .nr(8)
31082 .kr(1)
31083 .sr(1)
31084 .m(4)
31085 .n(8)
31086 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031087 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031088 }
31089
31090 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_eq_8_subtile) {
31091 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031092 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031093 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031094 GemmMicrokernelTester()
31095 .mr(4)
31096 .nr(8)
31097 .kr(1)
31098 .sr(1)
31099 .m(m)
31100 .n(n)
31101 .k(8)
31102 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031103 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031104 }
31105 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031106 }
31107
31108 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8) {
31109 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031110 for (size_t k = 1; k < 8; k++) {
31111 GemmMicrokernelTester()
31112 .mr(4)
31113 .nr(8)
31114 .kr(1)
31115 .sr(1)
31116 .m(4)
31117 .n(8)
31118 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031119 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031120 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031121 }
31122
31123 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_lt_8_subtile) {
31124 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031125 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031126 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031127 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031128 GemmMicrokernelTester()
31129 .mr(4)
31130 .nr(8)
31131 .kr(1)
31132 .sr(1)
31133 .m(m)
31134 .n(n)
31135 .k(k)
31136 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031137 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031138 }
31139 }
31140 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031141 }
31142
31143 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8) {
31144 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031145 for (size_t k = 9; k < 16; k++) {
31146 GemmMicrokernelTester()
31147 .mr(4)
31148 .nr(8)
31149 .kr(1)
31150 .sr(1)
31151 .m(4)
31152 .n(8)
31153 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031154 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031155 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031156 }
31157
31158 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_gt_8_subtile) {
31159 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031160 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031161 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031162 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031163 GemmMicrokernelTester()
31164 .mr(4)
31165 .nr(8)
31166 .kr(1)
31167 .sr(1)
31168 .m(m)
31169 .n(n)
31170 .k(k)
31171 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031172 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031173 }
31174 }
31175 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031176 }
31177
31178 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4) {
31179 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031180 for (size_t k = 12; k <= 40; k += 4) {
31181 GemmMicrokernelTester()
31182 .mr(4)
31183 .nr(8)
31184 .kr(1)
31185 .sr(1)
31186 .m(4)
31187 .n(8)
31188 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031189 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031190 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031191 }
31192
31193 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, k_div_4_subtile) {
31194 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031195 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031196 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031197 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031198 GemmMicrokernelTester()
31199 .mr(4)
31200 .nr(8)
31201 .kr(1)
31202 .sr(1)
31203 .m(m)
31204 .n(n)
31205 .k(k)
31206 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031207 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031208 }
31209 }
31210 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031211 }
31212
31213 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8) {
31214 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031215 for (uint32_t n = 9; n < 16; n++) {
31216 for (size_t k = 1; k <= 20; k += 5) {
31217 GemmMicrokernelTester()
31218 .mr(4)
31219 .nr(8)
31220 .kr(1)
31221 .sr(1)
31222 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031223 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031224 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031225 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031226 }
31227 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031228 }
31229
31230 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_strided_cn) {
31231 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031232 for (uint32_t n = 9; n < 16; n++) {
31233 for (size_t k = 1; k <= 20; k += 5) {
31234 GemmMicrokernelTester()
31235 .mr(4)
31236 .nr(8)
31237 .kr(1)
31238 .sr(1)
31239 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031240 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031241 .k(k)
31242 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031243 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031244 }
31245 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031246 }
31247
31248 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_subtile) {
31249 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031250 for (uint32_t n = 9; n < 16; n++) {
31251 for (size_t k = 1; k <= 20; k += 5) {
31252 for (uint32_t m = 1; m <= 4; m++) {
31253 GemmMicrokernelTester()
31254 .mr(4)
31255 .nr(8)
31256 .kr(1)
31257 .sr(1)
31258 .m(m)
31259 .n(n)
31260 .k(k)
31261 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031262 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031263 }
31264 }
31265 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031266 }
31267
31268 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8) {
31269 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031270 for (uint32_t n = 16; n <= 24; n += 8) {
31271 for (size_t k = 1; k <= 20; k += 5) {
31272 GemmMicrokernelTester()
31273 .mr(4)
31274 .nr(8)
31275 .kr(1)
31276 .sr(1)
31277 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031278 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031279 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031280 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031281 }
31282 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031283 }
31284
31285 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_strided_cn) {
31286 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031287 for (uint32_t n = 16; n <= 24; n += 8) {
31288 for (size_t k = 1; k <= 20; k += 5) {
31289 GemmMicrokernelTester()
31290 .mr(4)
31291 .nr(8)
31292 .kr(1)
31293 .sr(1)
31294 .m(4)
31295 .n(n)
31296 .k(k)
31297 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031298 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031299 }
31300 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031301 }
31302
31303 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_subtile) {
31304 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031305 for (uint32_t n = 16; n <= 24; n += 8) {
31306 for (size_t k = 1; k <= 20; k += 5) {
31307 for (uint32_t m = 1; m <= 4; m++) {
31308 GemmMicrokernelTester()
31309 .mr(4)
31310 .nr(8)
31311 .kr(1)
31312 .sr(1)
31313 .m(m)
31314 .n(n)
31315 .k(k)
31316 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031317 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031318 }
31319 }
31320 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031321 }
31322
31323 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, small_kernel) {
31324 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031325 for (size_t k = 1; k <= 20; k += 5) {
31326 GemmMicrokernelTester()
31327 .mr(4)
31328 .nr(8)
31329 .kr(1)
31330 .sr(1)
31331 .m(4)
31332 .n(8)
31333 .k(k)
31334 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031335 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031336 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031337 }
31338
31339 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, small_kernel_subtile) {
31340 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031341 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031342 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031343 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031344 GemmMicrokernelTester()
31345 .mr(4)
31346 .nr(8)
31347 .kr(1)
31348 .sr(1)
31349 .m(m)
31350 .n(n)
31351 .k(k)
31352 .ks(3)
31353 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031354 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031355 }
31356 }
31357 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031358 }
31359
31360 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_gt_8_small_kernel) {
31361 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031362 for (uint32_t n = 9; n < 16; n++) {
31363 for (size_t k = 1; k <= 20; k += 5) {
31364 GemmMicrokernelTester()
31365 .mr(4)
31366 .nr(8)
31367 .kr(1)
31368 .sr(1)
31369 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031370 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031371 .k(k)
31372 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031373 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031374 }
31375 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031376 }
31377
31378 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, n_div_8_small_kernel) {
31379 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031380 for (uint32_t n = 16; n <= 24; n += 8) {
31381 for (size_t k = 1; k <= 20; k += 5) {
31382 GemmMicrokernelTester()
31383 .mr(4)
31384 .nr(8)
31385 .kr(1)
31386 .sr(1)
31387 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031388 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031389 .k(k)
31390 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031391 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031392 }
31393 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031394 }
31395
31396 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cm_subtile) {
31397 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031398 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031399 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031400 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031401 GemmMicrokernelTester()
31402 .mr(4)
31403 .nr(8)
31404 .kr(1)
31405 .sr(1)
31406 .m(m)
31407 .n(n)
31408 .k(k)
31409 .cm_stride(11)
31410 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031411 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031412 }
31413 }
31414 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031415 }
31416
31417 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, a_offset) {
31418 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031419 for (size_t k = 1; k <= 20; k += 5) {
31420 GemmMicrokernelTester()
31421 .mr(4)
31422 .nr(8)
31423 .kr(1)
31424 .sr(1)
31425 .m(4)
31426 .n(8)
31427 .k(k)
31428 .ks(3)
31429 .a_offset(83)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031430 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031431 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031432 }
31433
31434 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, zero) {
31435 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031436 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031437 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031438 GemmMicrokernelTester()
31439 .mr(4)
31440 .nr(8)
31441 .kr(1)
31442 .sr(1)
31443 .m(4)
31444 .n(8)
31445 .k(k)
31446 .ks(3)
31447 .a_offset(83)
31448 .zero_index(mz)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031449 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031450 }
31451 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031452 }
31453
31454 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, qmin) {
31455 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031456 GemmMicrokernelTester()
31457 .mr(4)
31458 .nr(8)
31459 .kr(1)
31460 .sr(1)
31461 .m(4)
31462 .n(8)
31463 .k(4)
31464 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031465 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031466 }
31467
31468 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, qmax) {
31469 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031470 GemmMicrokernelTester()
31471 .mr(4)
31472 .nr(8)
31473 .kr(1)
31474 .sr(1)
31475 .m(4)
31476 .n(8)
31477 .k(4)
31478 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031479 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031480 }
31481
31482 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A75, strided_cm) {
31483 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031484 GemmMicrokernelTester()
31485 .mr(4)
31486 .nr(8)
31487 .kr(1)
31488 .sr(1)
31489 .m(4)
31490 .n(8)
31491 .k(4)
31492 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031493 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031494 }
31495#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
31496
31497
31498#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031499 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4) {
31500 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031501 GemmMicrokernelTester()
31502 .mr(4)
31503 .nr(8)
31504 .kr(1)
31505 .sr(1)
31506 .m(4)
31507 .n(8)
31508 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031509 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031510 }
31511
31512 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cn) {
31513 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031514 GemmMicrokernelTester()
31515 .mr(4)
31516 .nr(8)
31517 .kr(1)
31518 .sr(1)
31519 .m(4)
31520 .n(8)
31521 .k(4)
31522 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031523 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031524 }
31525
31526 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile) {
31527 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031528 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031529 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031530 GemmMicrokernelTester()
31531 .mr(4)
31532 .nr(8)
31533 .kr(1)
31534 .sr(1)
31535 .m(m)
31536 .n(n)
31537 .k(4)
31538 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031539 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031540 }
31541 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031542 }
31543
31544 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_m) {
31545 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031546 for (uint32_t m = 1; m <= 4; m++) {
31547 GemmMicrokernelTester()
31548 .mr(4)
31549 .nr(8)
31550 .kr(1)
31551 .sr(1)
31552 .m(m)
31553 .n(8)
31554 .k(4)
31555 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031556 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031557 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031558 }
31559
31560 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_4_subtile_n) {
31561 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031562 for (uint32_t n = 1; n <= 8; n++) {
31563 GemmMicrokernelTester()
31564 .mr(4)
31565 .nr(8)
31566 .kr(1)
31567 .sr(1)
31568 .m(4)
31569 .n(n)
31570 .k(4)
31571 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031572 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031573 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031574 }
31575
31576 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8) {
31577 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031578 GemmMicrokernelTester()
31579 .mr(4)
31580 .nr(8)
31581 .kr(1)
31582 .sr(1)
31583 .m(4)
31584 .n(8)
31585 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031586 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031587 }
31588
31589 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_eq_8_subtile) {
31590 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031591 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031592 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031593 GemmMicrokernelTester()
31594 .mr(4)
31595 .nr(8)
31596 .kr(1)
31597 .sr(1)
31598 .m(m)
31599 .n(n)
31600 .k(8)
31601 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031602 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031603 }
31604 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031605 }
31606
31607 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8) {
31608 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031609 for (size_t k = 1; k < 8; k++) {
31610 GemmMicrokernelTester()
31611 .mr(4)
31612 .nr(8)
31613 .kr(1)
31614 .sr(1)
31615 .m(4)
31616 .n(8)
31617 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031618 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031619 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031620 }
31621
31622 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_lt_8_subtile) {
31623 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031624 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031625 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031626 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031627 GemmMicrokernelTester()
31628 .mr(4)
31629 .nr(8)
31630 .kr(1)
31631 .sr(1)
31632 .m(m)
31633 .n(n)
31634 .k(k)
31635 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031636 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031637 }
31638 }
31639 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031640 }
31641
31642 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8) {
31643 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031644 for (size_t k = 9; k < 16; k++) {
31645 GemmMicrokernelTester()
31646 .mr(4)
31647 .nr(8)
31648 .kr(1)
31649 .sr(1)
31650 .m(4)
31651 .n(8)
31652 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031653 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031654 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031655 }
31656
31657 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_gt_8_subtile) {
31658 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031659 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031660 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031661 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031662 GemmMicrokernelTester()
31663 .mr(4)
31664 .nr(8)
31665 .kr(1)
31666 .sr(1)
31667 .m(m)
31668 .n(n)
31669 .k(k)
31670 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031671 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031672 }
31673 }
31674 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031675 }
31676
31677 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4) {
31678 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031679 for (size_t k = 12; k <= 40; k += 4) {
31680 GemmMicrokernelTester()
31681 .mr(4)
31682 .nr(8)
31683 .kr(1)
31684 .sr(1)
31685 .m(4)
31686 .n(8)
31687 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031688 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031689 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031690 }
31691
31692 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, k_div_4_subtile) {
31693 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031694 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031695 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031696 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031697 GemmMicrokernelTester()
31698 .mr(4)
31699 .nr(8)
31700 .kr(1)
31701 .sr(1)
31702 .m(m)
31703 .n(n)
31704 .k(k)
31705 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031706 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031707 }
31708 }
31709 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031710 }
31711
31712 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8) {
31713 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031714 for (uint32_t n = 9; n < 16; n++) {
31715 for (size_t k = 1; k <= 20; k += 5) {
31716 GemmMicrokernelTester()
31717 .mr(4)
31718 .nr(8)
31719 .kr(1)
31720 .sr(1)
31721 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031722 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031723 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031724 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031725 }
31726 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031727 }
31728
31729 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_strided_cn) {
31730 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031731 for (uint32_t n = 9; n < 16; n++) {
31732 for (size_t k = 1; k <= 20; k += 5) {
31733 GemmMicrokernelTester()
31734 .mr(4)
31735 .nr(8)
31736 .kr(1)
31737 .sr(1)
31738 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031739 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031740 .k(k)
31741 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031742 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031743 }
31744 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031745 }
31746
31747 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_subtile) {
31748 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031749 for (uint32_t n = 9; n < 16; n++) {
31750 for (size_t k = 1; k <= 20; k += 5) {
31751 for (uint32_t m = 1; m <= 4; m++) {
31752 GemmMicrokernelTester()
31753 .mr(4)
31754 .nr(8)
31755 .kr(1)
31756 .sr(1)
31757 .m(m)
31758 .n(n)
31759 .k(k)
31760 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031761 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031762 }
31763 }
31764 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031765 }
31766
31767 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8) {
31768 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031769 for (uint32_t n = 16; n <= 24; n += 8) {
31770 for (size_t k = 1; k <= 20; k += 5) {
31771 GemmMicrokernelTester()
31772 .mr(4)
31773 .nr(8)
31774 .kr(1)
31775 .sr(1)
31776 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031777 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031778 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031779 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031780 }
31781 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031782 }
31783
31784 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_strided_cn) {
31785 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031786 for (uint32_t n = 16; n <= 24; n += 8) {
31787 for (size_t k = 1; k <= 20; k += 5) {
31788 GemmMicrokernelTester()
31789 .mr(4)
31790 .nr(8)
31791 .kr(1)
31792 .sr(1)
31793 .m(4)
31794 .n(n)
31795 .k(k)
31796 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031797 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031798 }
31799 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031800 }
31801
31802 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_subtile) {
31803 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031804 for (uint32_t n = 16; n <= 24; n += 8) {
31805 for (size_t k = 1; k <= 20; k += 5) {
31806 for (uint32_t m = 1; m <= 4; m++) {
31807 GemmMicrokernelTester()
31808 .mr(4)
31809 .nr(8)
31810 .kr(1)
31811 .sr(1)
31812 .m(m)
31813 .n(n)
31814 .k(k)
31815 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031816 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031817 }
31818 }
31819 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031820 }
31821
31822 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, small_kernel) {
31823 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031824 for (size_t k = 1; k <= 20; k += 5) {
31825 GemmMicrokernelTester()
31826 .mr(4)
31827 .nr(8)
31828 .kr(1)
31829 .sr(1)
31830 .m(4)
31831 .n(8)
31832 .k(k)
31833 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031834 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031835 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031836 }
31837
31838 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, small_kernel_subtile) {
31839 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031840 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031841 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031842 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031843 GemmMicrokernelTester()
31844 .mr(4)
31845 .nr(8)
31846 .kr(1)
31847 .sr(1)
31848 .m(m)
31849 .n(n)
31850 .k(k)
31851 .ks(3)
31852 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031853 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031854 }
31855 }
31856 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031857 }
31858
31859 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_gt_8_small_kernel) {
31860 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031861 for (uint32_t n = 9; n < 16; n++) {
31862 for (size_t k = 1; k <= 20; k += 5) {
31863 GemmMicrokernelTester()
31864 .mr(4)
31865 .nr(8)
31866 .kr(1)
31867 .sr(1)
31868 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031869 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031870 .k(k)
31871 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031872 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031873 }
31874 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031875 }
31876
31877 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, n_div_8_small_kernel) {
31878 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031879 for (uint32_t n = 16; n <= 24; n += 8) {
31880 for (size_t k = 1; k <= 20; k += 5) {
31881 GemmMicrokernelTester()
31882 .mr(4)
31883 .nr(8)
31884 .kr(1)
31885 .sr(1)
31886 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031887 .n(n)
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031888 .k(k)
31889 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031890 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031891 }
31892 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031893 }
31894
31895 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cm_subtile) {
31896 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031897 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031898 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031899 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031900 GemmMicrokernelTester()
31901 .mr(4)
31902 .nr(8)
31903 .kr(1)
31904 .sr(1)
31905 .m(m)
31906 .n(n)
31907 .k(k)
31908 .cm_stride(11)
31909 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031910 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031911 }
31912 }
31913 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031914 }
31915
31916 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, a_offset) {
31917 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031918 for (size_t k = 1; k <= 20; k += 5) {
31919 GemmMicrokernelTester()
31920 .mr(4)
31921 .nr(8)
31922 .kr(1)
31923 .sr(1)
31924 .m(4)
31925 .n(8)
31926 .k(k)
31927 .ks(3)
31928 .a_offset(83)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031929 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031930 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031931 }
31932
31933 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, zero) {
31934 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031935 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031936 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031937 GemmMicrokernelTester()
31938 .mr(4)
31939 .nr(8)
31940 .kr(1)
31941 .sr(1)
31942 .m(4)
31943 .n(8)
31944 .k(k)
31945 .ks(3)
31946 .a_offset(83)
31947 .zero_index(mz)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031948 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031949 }
31950 }
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031951 }
31952
31953 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, qmin) {
31954 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031955 GemmMicrokernelTester()
31956 .mr(4)
31957 .nr(8)
31958 .kr(1)
31959 .sr(1)
31960 .m(4)
31961 .n(8)
31962 .k(4)
31963 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031964 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031965 }
31966
31967 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, qmax) {
31968 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031969 GemmMicrokernelTester()
31970 .mr(4)
31971 .nr(8)
31972 .kr(1)
31973 .sr(1)
31974 .m(4)
31975 .n(8)
31976 .k(4)
31977 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031978 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031979 }
31980
31981 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A55, strided_cm) {
31982 TEST_REQUIRES_ARM_NEON;
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031983 GemmMicrokernelTester()
31984 .mr(4)
31985 .nr(8)
31986 .kr(1)
31987 .sr(1)
31988 .m(4)
31989 .n(8)
31990 .k(4)
31991 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080031992 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a55, xnn_init_f32_minmax_scalar_params);
Zhi An Ng13b57dd2022-01-06 09:33:20 -080031993 }
31994#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
Zhi An Ng6b72e6c2022-02-03 11:16:27 -080031995
31996
31997#if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
31998 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
31999 TEST_REQUIRES_ARM_NEON_FMA;
32000 GemmMicrokernelTester()
32001 .mr(6)
32002 .nr(8)
32003 .kr(1)
32004 .sr(1)
32005 .m(6)
32006 .n(8)
32007 .k(8)
32008 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32009 }
32010
32011 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
32012 TEST_REQUIRES_ARM_NEON_FMA;
32013 GemmMicrokernelTester()
32014 .mr(6)
32015 .nr(8)
32016 .kr(1)
32017 .sr(1)
32018 .m(6)
32019 .n(8)
32020 .k(8)
32021 .cn_stride(11)
32022 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32023 }
32024
32025 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
32026 TEST_REQUIRES_ARM_NEON_FMA;
32027 for (uint32_t n = 1; n <= 8; n++) {
32028 for (uint32_t m = 1; m <= 6; m++) {
32029 GemmMicrokernelTester()
32030 .mr(6)
32031 .nr(8)
32032 .kr(1)
32033 .sr(1)
32034 .m(m)
32035 .n(n)
32036 .k(8)
32037 .iterations(1)
32038 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32039 }
32040 }
32041 }
32042
32043 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
32044 TEST_REQUIRES_ARM_NEON_FMA;
32045 for (uint32_t m = 1; m <= 6; m++) {
32046 GemmMicrokernelTester()
32047 .mr(6)
32048 .nr(8)
32049 .kr(1)
32050 .sr(1)
32051 .m(m)
32052 .n(8)
32053 .k(8)
32054 .iterations(1)
32055 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32056 }
32057 }
32058
32059 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
32060 TEST_REQUIRES_ARM_NEON_FMA;
32061 for (uint32_t n = 1; n <= 8; n++) {
32062 GemmMicrokernelTester()
32063 .mr(6)
32064 .nr(8)
32065 .kr(1)
32066 .sr(1)
32067 .m(6)
32068 .n(n)
32069 .k(8)
32070 .iterations(1)
32071 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32072 }
32073 }
32074
32075 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
32076 TEST_REQUIRES_ARM_NEON_FMA;
32077 GemmMicrokernelTester()
32078 .mr(6)
32079 .nr(8)
32080 .kr(1)
32081 .sr(1)
32082 .m(6)
32083 .n(8)
32084 .k(16)
32085 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32086 }
32087
32088 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
32089 TEST_REQUIRES_ARM_NEON_FMA;
32090 for (uint32_t n = 1; n <= 8; n++) {
32091 for (uint32_t m = 1; m <= 6; m++) {
32092 GemmMicrokernelTester()
32093 .mr(6)
32094 .nr(8)
32095 .kr(1)
32096 .sr(1)
32097 .m(m)
32098 .n(n)
32099 .k(16)
32100 .iterations(1)
32101 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32102 }
32103 }
32104 }
32105
32106 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
32107 TEST_REQUIRES_ARM_NEON_FMA;
32108 for (size_t k = 1; k < 16; k++) {
32109 GemmMicrokernelTester()
32110 .mr(6)
32111 .nr(8)
32112 .kr(1)
32113 .sr(1)
32114 .m(6)
32115 .n(8)
32116 .k(k)
32117 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32118 }
32119 }
32120
32121 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
32122 TEST_REQUIRES_ARM_NEON_FMA;
32123 for (size_t k = 1; k < 16; k++) {
32124 for (uint32_t n = 1; n <= 8; n++) {
32125 for (uint32_t m = 1; m <= 6; m++) {
32126 GemmMicrokernelTester()
32127 .mr(6)
32128 .nr(8)
32129 .kr(1)
32130 .sr(1)
32131 .m(m)
32132 .n(n)
32133 .k(k)
32134 .iterations(1)
32135 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32136 }
32137 }
32138 }
32139 }
32140
32141 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
32142 TEST_REQUIRES_ARM_NEON_FMA;
32143 for (size_t k = 17; k < 32; k++) {
32144 GemmMicrokernelTester()
32145 .mr(6)
32146 .nr(8)
32147 .kr(1)
32148 .sr(1)
32149 .m(6)
32150 .n(8)
32151 .k(k)
32152 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32153 }
32154 }
32155
32156 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
32157 TEST_REQUIRES_ARM_NEON_FMA;
32158 for (size_t k = 17; k < 32; k++) {
32159 for (uint32_t n = 1; n <= 8; n++) {
32160 for (uint32_t m = 1; m <= 6; m++) {
32161 GemmMicrokernelTester()
32162 .mr(6)
32163 .nr(8)
32164 .kr(1)
32165 .sr(1)
32166 .m(m)
32167 .n(n)
32168 .k(k)
32169 .iterations(1)
32170 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32171 }
32172 }
32173 }
32174 }
32175
32176 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
32177 TEST_REQUIRES_ARM_NEON_FMA;
32178 for (size_t k = 24; k <= 80; k += 8) {
32179 GemmMicrokernelTester()
32180 .mr(6)
32181 .nr(8)
32182 .kr(1)
32183 .sr(1)
32184 .m(6)
32185 .n(8)
32186 .k(k)
32187 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32188 }
32189 }
32190
32191 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
32192 TEST_REQUIRES_ARM_NEON_FMA;
32193 for (size_t k = 24; k <= 80; k += 8) {
32194 for (uint32_t n = 1; n <= 8; n++) {
32195 for (uint32_t m = 1; m <= 6; m++) {
32196 GemmMicrokernelTester()
32197 .mr(6)
32198 .nr(8)
32199 .kr(1)
32200 .sr(1)
32201 .m(m)
32202 .n(n)
32203 .k(k)
32204 .iterations(1)
32205 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32206 }
32207 }
32208 }
32209 }
32210
32211 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
32212 TEST_REQUIRES_ARM_NEON_FMA;
32213 for (uint32_t n = 9; n < 16; n++) {
32214 for (size_t k = 1; k <= 40; k += 9) {
32215 GemmMicrokernelTester()
32216 .mr(6)
32217 .nr(8)
32218 .kr(1)
32219 .sr(1)
32220 .m(6)
32221 .n(n)
32222 .k(k)
32223 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32224 }
32225 }
32226 }
32227
32228 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
32229 TEST_REQUIRES_ARM_NEON_FMA;
32230 for (uint32_t n = 9; n < 16; n++) {
32231 for (size_t k = 1; k <= 40; k += 9) {
32232 GemmMicrokernelTester()
32233 .mr(6)
32234 .nr(8)
32235 .kr(1)
32236 .sr(1)
32237 .m(6)
32238 .n(n)
32239 .k(k)
32240 .cn_stride(11)
32241 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32242 }
32243 }
32244 }
32245
32246 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
32247 TEST_REQUIRES_ARM_NEON_FMA;
32248 for (uint32_t n = 9; n < 16; n++) {
32249 for (size_t k = 1; k <= 40; k += 9) {
32250 for (uint32_t m = 1; m <= 6; m++) {
32251 GemmMicrokernelTester()
32252 .mr(6)
32253 .nr(8)
32254 .kr(1)
32255 .sr(1)
32256 .m(m)
32257 .n(n)
32258 .k(k)
32259 .iterations(1)
32260 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32261 }
32262 }
32263 }
32264 }
32265
32266 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
32267 TEST_REQUIRES_ARM_NEON_FMA;
32268 for (uint32_t n = 16; n <= 24; n += 8) {
32269 for (size_t k = 1; k <= 40; k += 9) {
32270 GemmMicrokernelTester()
32271 .mr(6)
32272 .nr(8)
32273 .kr(1)
32274 .sr(1)
32275 .m(6)
32276 .n(n)
32277 .k(k)
32278 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32279 }
32280 }
32281 }
32282
32283 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
32284 TEST_REQUIRES_ARM_NEON_FMA;
32285 for (uint32_t n = 16; n <= 24; n += 8) {
32286 for (size_t k = 1; k <= 40; k += 9) {
32287 GemmMicrokernelTester()
32288 .mr(6)
32289 .nr(8)
32290 .kr(1)
32291 .sr(1)
32292 .m(6)
32293 .n(n)
32294 .k(k)
32295 .cn_stride(11)
32296 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32297 }
32298 }
32299 }
32300
32301 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
32302 TEST_REQUIRES_ARM_NEON_FMA;
32303 for (uint32_t n = 16; n <= 24; n += 8) {
32304 for (size_t k = 1; k <= 40; k += 9) {
32305 for (uint32_t m = 1; m <= 6; m++) {
32306 GemmMicrokernelTester()
32307 .mr(6)
32308 .nr(8)
32309 .kr(1)
32310 .sr(1)
32311 .m(m)
32312 .n(n)
32313 .k(k)
32314 .iterations(1)
32315 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32316 }
32317 }
32318 }
32319 }
32320
32321 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
32322 TEST_REQUIRES_ARM_NEON_FMA;
32323 for (size_t k = 1; k <= 40; k += 9) {
32324 GemmMicrokernelTester()
32325 .mr(6)
32326 .nr(8)
32327 .kr(1)
32328 .sr(1)
32329 .m(6)
32330 .n(8)
32331 .k(k)
32332 .ks(3)
32333 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32334 }
32335 }
32336
32337 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
32338 TEST_REQUIRES_ARM_NEON_FMA;
32339 for (size_t k = 1; k <= 40; k += 9) {
32340 for (uint32_t n = 1; n <= 8; n++) {
32341 for (uint32_t m = 1; m <= 6; m++) {
32342 GemmMicrokernelTester()
32343 .mr(6)
32344 .nr(8)
32345 .kr(1)
32346 .sr(1)
32347 .m(m)
32348 .n(n)
32349 .k(k)
32350 .ks(3)
32351 .iterations(1)
32352 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32353 }
32354 }
32355 }
32356 }
32357
32358 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
32359 TEST_REQUIRES_ARM_NEON_FMA;
32360 for (uint32_t n = 9; n < 16; n++) {
32361 for (size_t k = 1; k <= 40; k += 9) {
32362 GemmMicrokernelTester()
32363 .mr(6)
32364 .nr(8)
32365 .kr(1)
32366 .sr(1)
32367 .m(6)
32368 .n(n)
32369 .k(k)
32370 .ks(3)
32371 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32372 }
32373 }
32374 }
32375
32376 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
32377 TEST_REQUIRES_ARM_NEON_FMA;
32378 for (uint32_t n = 16; n <= 24; n += 8) {
32379 for (size_t k = 1; k <= 40; k += 9) {
32380 GemmMicrokernelTester()
32381 .mr(6)
32382 .nr(8)
32383 .kr(1)
32384 .sr(1)
32385 .m(6)
32386 .n(n)
32387 .k(k)
32388 .ks(3)
32389 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32390 }
32391 }
32392 }
32393
32394 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
32395 TEST_REQUIRES_ARM_NEON_FMA;
32396 for (size_t k = 1; k <= 40; k += 9) {
32397 for (uint32_t n = 1; n <= 8; n++) {
32398 for (uint32_t m = 1; m <= 6; m++) {
32399 GemmMicrokernelTester()
32400 .mr(6)
32401 .nr(8)
32402 .kr(1)
32403 .sr(1)
32404 .m(m)
32405 .n(n)
32406 .k(k)
32407 .cm_stride(11)
32408 .iterations(1)
32409 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32410 }
32411 }
32412 }
32413 }
32414
32415 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
32416 TEST_REQUIRES_ARM_NEON_FMA;
32417 for (size_t k = 1; k <= 40; k += 9) {
32418 GemmMicrokernelTester()
32419 .mr(6)
32420 .nr(8)
32421 .kr(1)
32422 .sr(1)
32423 .m(6)
32424 .n(8)
32425 .k(k)
32426 .ks(3)
32427 .a_offset(251)
32428 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32429 }
32430 }
32431
32432 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
32433 TEST_REQUIRES_ARM_NEON_FMA;
32434 for (size_t k = 1; k <= 40; k += 9) {
32435 for (uint32_t mz = 0; mz < 6; mz++) {
32436 GemmMicrokernelTester()
32437 .mr(6)
32438 .nr(8)
32439 .kr(1)
32440 .sr(1)
32441 .m(6)
32442 .n(8)
32443 .k(k)
32444 .ks(3)
32445 .a_offset(251)
32446 .zero_index(mz)
32447 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32448 }
32449 }
32450 }
32451
32452 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
32453 TEST_REQUIRES_ARM_NEON_FMA;
32454 GemmMicrokernelTester()
32455 .mr(6)
32456 .nr(8)
32457 .kr(1)
32458 .sr(1)
32459 .m(6)
32460 .n(8)
32461 .k(8)
32462 .qmin(128)
32463 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32464 }
32465
32466 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
32467 TEST_REQUIRES_ARM_NEON_FMA;
32468 GemmMicrokernelTester()
32469 .mr(6)
32470 .nr(8)
32471 .kr(1)
32472 .sr(1)
32473 .m(6)
32474 .n(8)
32475 .k(8)
32476 .qmax(128)
32477 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32478 }
32479
32480 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
32481 TEST_REQUIRES_ARM_NEON_FMA;
32482 GemmMicrokernelTester()
32483 .mr(6)
32484 .nr(8)
32485 .kr(1)
32486 .sr(1)
32487 .m(6)
32488 .n(8)
32489 .k(8)
32490 .cm_stride(11)
32491 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
32492 }
32493#endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
32494
32495
32496#if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
32497 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
32498 TEST_REQUIRES_ARM_NEON_FMA;
32499 GemmMicrokernelTester()
32500 .mr(6)
32501 .nr(8)
32502 .kr(1)
32503 .sr(1)
32504 .m(6)
32505 .n(8)
32506 .k(8)
32507 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32508 }
32509
32510 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
32511 TEST_REQUIRES_ARM_NEON_FMA;
32512 GemmMicrokernelTester()
32513 .mr(6)
32514 .nr(8)
32515 .kr(1)
32516 .sr(1)
32517 .m(6)
32518 .n(8)
32519 .k(8)
32520 .cn_stride(11)
32521 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32522 }
32523
32524 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
32525 TEST_REQUIRES_ARM_NEON_FMA;
32526 for (uint32_t n = 1; n <= 8; n++) {
32527 for (uint32_t m = 1; m <= 6; m++) {
32528 GemmMicrokernelTester()
32529 .mr(6)
32530 .nr(8)
32531 .kr(1)
32532 .sr(1)
32533 .m(m)
32534 .n(n)
32535 .k(8)
32536 .iterations(1)
32537 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32538 }
32539 }
32540 }
32541
32542 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
32543 TEST_REQUIRES_ARM_NEON_FMA;
32544 for (uint32_t m = 1; m <= 6; m++) {
32545 GemmMicrokernelTester()
32546 .mr(6)
32547 .nr(8)
32548 .kr(1)
32549 .sr(1)
32550 .m(m)
32551 .n(8)
32552 .k(8)
32553 .iterations(1)
32554 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32555 }
32556 }
32557
32558 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
32559 TEST_REQUIRES_ARM_NEON_FMA;
32560 for (uint32_t n = 1; n <= 8; n++) {
32561 GemmMicrokernelTester()
32562 .mr(6)
32563 .nr(8)
32564 .kr(1)
32565 .sr(1)
32566 .m(6)
32567 .n(n)
32568 .k(8)
32569 .iterations(1)
32570 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32571 }
32572 }
32573
32574 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
32575 TEST_REQUIRES_ARM_NEON_FMA;
32576 GemmMicrokernelTester()
32577 .mr(6)
32578 .nr(8)
32579 .kr(1)
32580 .sr(1)
32581 .m(6)
32582 .n(8)
32583 .k(16)
32584 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32585 }
32586
32587 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
32588 TEST_REQUIRES_ARM_NEON_FMA;
32589 for (uint32_t n = 1; n <= 8; n++) {
32590 for (uint32_t m = 1; m <= 6; m++) {
32591 GemmMicrokernelTester()
32592 .mr(6)
32593 .nr(8)
32594 .kr(1)
32595 .sr(1)
32596 .m(m)
32597 .n(n)
32598 .k(16)
32599 .iterations(1)
32600 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32601 }
32602 }
32603 }
32604
32605 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
32606 TEST_REQUIRES_ARM_NEON_FMA;
32607 for (size_t k = 1; k < 16; k++) {
32608 GemmMicrokernelTester()
32609 .mr(6)
32610 .nr(8)
32611 .kr(1)
32612 .sr(1)
32613 .m(6)
32614 .n(8)
32615 .k(k)
32616 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32617 }
32618 }
32619
32620 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
32621 TEST_REQUIRES_ARM_NEON_FMA;
32622 for (size_t k = 1; k < 16; k++) {
32623 for (uint32_t n = 1; n <= 8; n++) {
32624 for (uint32_t m = 1; m <= 6; m++) {
32625 GemmMicrokernelTester()
32626 .mr(6)
32627 .nr(8)
32628 .kr(1)
32629 .sr(1)
32630 .m(m)
32631 .n(n)
32632 .k(k)
32633 .iterations(1)
32634 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32635 }
32636 }
32637 }
32638 }
32639
32640 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
32641 TEST_REQUIRES_ARM_NEON_FMA;
32642 for (size_t k = 17; k < 32; k++) {
32643 GemmMicrokernelTester()
32644 .mr(6)
32645 .nr(8)
32646 .kr(1)
32647 .sr(1)
32648 .m(6)
32649 .n(8)
32650 .k(k)
32651 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32652 }
32653 }
32654
32655 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
32656 TEST_REQUIRES_ARM_NEON_FMA;
32657 for (size_t k = 17; k < 32; k++) {
32658 for (uint32_t n = 1; n <= 8; n++) {
32659 for (uint32_t m = 1; m <= 6; m++) {
32660 GemmMicrokernelTester()
32661 .mr(6)
32662 .nr(8)
32663 .kr(1)
32664 .sr(1)
32665 .m(m)
32666 .n(n)
32667 .k(k)
32668 .iterations(1)
32669 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32670 }
32671 }
32672 }
32673 }
32674
32675 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
32676 TEST_REQUIRES_ARM_NEON_FMA;
32677 for (size_t k = 24; k <= 80; k += 8) {
32678 GemmMicrokernelTester()
32679 .mr(6)
32680 .nr(8)
32681 .kr(1)
32682 .sr(1)
32683 .m(6)
32684 .n(8)
32685 .k(k)
32686 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32687 }
32688 }
32689
32690 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
32691 TEST_REQUIRES_ARM_NEON_FMA;
32692 for (size_t k = 24; k <= 80; k += 8) {
32693 for (uint32_t n = 1; n <= 8; n++) {
32694 for (uint32_t m = 1; m <= 6; m++) {
32695 GemmMicrokernelTester()
32696 .mr(6)
32697 .nr(8)
32698 .kr(1)
32699 .sr(1)
32700 .m(m)
32701 .n(n)
32702 .k(k)
32703 .iterations(1)
32704 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32705 }
32706 }
32707 }
32708 }
32709
32710 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
32711 TEST_REQUIRES_ARM_NEON_FMA;
32712 for (uint32_t n = 9; n < 16; n++) {
32713 for (size_t k = 1; k <= 40; k += 9) {
32714 GemmMicrokernelTester()
32715 .mr(6)
32716 .nr(8)
32717 .kr(1)
32718 .sr(1)
32719 .m(6)
32720 .n(n)
32721 .k(k)
32722 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32723 }
32724 }
32725 }
32726
32727 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
32728 TEST_REQUIRES_ARM_NEON_FMA;
32729 for (uint32_t n = 9; n < 16; n++) {
32730 for (size_t k = 1; k <= 40; k += 9) {
32731 GemmMicrokernelTester()
32732 .mr(6)
32733 .nr(8)
32734 .kr(1)
32735 .sr(1)
32736 .m(6)
32737 .n(n)
32738 .k(k)
32739 .cn_stride(11)
32740 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32741 }
32742 }
32743 }
32744
32745 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
32746 TEST_REQUIRES_ARM_NEON_FMA;
32747 for (uint32_t n = 9; n < 16; n++) {
32748 for (size_t k = 1; k <= 40; k += 9) {
32749 for (uint32_t m = 1; m <= 6; m++) {
32750 GemmMicrokernelTester()
32751 .mr(6)
32752 .nr(8)
32753 .kr(1)
32754 .sr(1)
32755 .m(m)
32756 .n(n)
32757 .k(k)
32758 .iterations(1)
32759 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32760 }
32761 }
32762 }
32763 }
32764
32765 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
32766 TEST_REQUIRES_ARM_NEON_FMA;
32767 for (uint32_t n = 16; n <= 24; n += 8) {
32768 for (size_t k = 1; k <= 40; k += 9) {
32769 GemmMicrokernelTester()
32770 .mr(6)
32771 .nr(8)
32772 .kr(1)
32773 .sr(1)
32774 .m(6)
32775 .n(n)
32776 .k(k)
32777 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32778 }
32779 }
32780 }
32781
32782 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
32783 TEST_REQUIRES_ARM_NEON_FMA;
32784 for (uint32_t n = 16; n <= 24; n += 8) {
32785 for (size_t k = 1; k <= 40; k += 9) {
32786 GemmMicrokernelTester()
32787 .mr(6)
32788 .nr(8)
32789 .kr(1)
32790 .sr(1)
32791 .m(6)
32792 .n(n)
32793 .k(k)
32794 .cn_stride(11)
32795 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32796 }
32797 }
32798 }
32799
32800 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
32801 TEST_REQUIRES_ARM_NEON_FMA;
32802 for (uint32_t n = 16; n <= 24; n += 8) {
32803 for (size_t k = 1; k <= 40; k += 9) {
32804 for (uint32_t m = 1; m <= 6; m++) {
32805 GemmMicrokernelTester()
32806 .mr(6)
32807 .nr(8)
32808 .kr(1)
32809 .sr(1)
32810 .m(m)
32811 .n(n)
32812 .k(k)
32813 .iterations(1)
32814 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32815 }
32816 }
32817 }
32818 }
32819
32820 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
32821 TEST_REQUIRES_ARM_NEON_FMA;
32822 for (size_t k = 1; k <= 40; k += 9) {
32823 GemmMicrokernelTester()
32824 .mr(6)
32825 .nr(8)
32826 .kr(1)
32827 .sr(1)
32828 .m(6)
32829 .n(8)
32830 .k(k)
32831 .ks(3)
32832 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32833 }
32834 }
32835
32836 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
32837 TEST_REQUIRES_ARM_NEON_FMA;
32838 for (size_t k = 1; k <= 40; k += 9) {
32839 for (uint32_t n = 1; n <= 8; n++) {
32840 for (uint32_t m = 1; m <= 6; m++) {
32841 GemmMicrokernelTester()
32842 .mr(6)
32843 .nr(8)
32844 .kr(1)
32845 .sr(1)
32846 .m(m)
32847 .n(n)
32848 .k(k)
32849 .ks(3)
32850 .iterations(1)
32851 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32852 }
32853 }
32854 }
32855 }
32856
32857 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
32858 TEST_REQUIRES_ARM_NEON_FMA;
32859 for (uint32_t n = 9; n < 16; n++) {
32860 for (size_t k = 1; k <= 40; k += 9) {
32861 GemmMicrokernelTester()
32862 .mr(6)
32863 .nr(8)
32864 .kr(1)
32865 .sr(1)
32866 .m(6)
32867 .n(n)
32868 .k(k)
32869 .ks(3)
32870 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32871 }
32872 }
32873 }
32874
32875 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
32876 TEST_REQUIRES_ARM_NEON_FMA;
32877 for (uint32_t n = 16; n <= 24; n += 8) {
32878 for (size_t k = 1; k <= 40; k += 9) {
32879 GemmMicrokernelTester()
32880 .mr(6)
32881 .nr(8)
32882 .kr(1)
32883 .sr(1)
32884 .m(6)
32885 .n(n)
32886 .k(k)
32887 .ks(3)
32888 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32889 }
32890 }
32891 }
32892
32893 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
32894 TEST_REQUIRES_ARM_NEON_FMA;
32895 for (size_t k = 1; k <= 40; k += 9) {
32896 for (uint32_t n = 1; n <= 8; n++) {
32897 for (uint32_t m = 1; m <= 6; m++) {
32898 GemmMicrokernelTester()
32899 .mr(6)
32900 .nr(8)
32901 .kr(1)
32902 .sr(1)
32903 .m(m)
32904 .n(n)
32905 .k(k)
32906 .cm_stride(11)
32907 .iterations(1)
32908 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32909 }
32910 }
32911 }
32912 }
32913
32914 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
32915 TEST_REQUIRES_ARM_NEON_FMA;
32916 for (size_t k = 1; k <= 40; k += 9) {
32917 GemmMicrokernelTester()
32918 .mr(6)
32919 .nr(8)
32920 .kr(1)
32921 .sr(1)
32922 .m(6)
32923 .n(8)
32924 .k(k)
32925 .ks(3)
32926 .a_offset(251)
32927 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32928 }
32929 }
32930
32931 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
32932 TEST_REQUIRES_ARM_NEON_FMA;
32933 for (size_t k = 1; k <= 40; k += 9) {
32934 for (uint32_t mz = 0; mz < 6; mz++) {
32935 GemmMicrokernelTester()
32936 .mr(6)
32937 .nr(8)
32938 .kr(1)
32939 .sr(1)
32940 .m(6)
32941 .n(8)
32942 .k(k)
32943 .ks(3)
32944 .a_offset(251)
32945 .zero_index(mz)
32946 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32947 }
32948 }
32949 }
32950
32951 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
32952 TEST_REQUIRES_ARM_NEON_FMA;
32953 GemmMicrokernelTester()
32954 .mr(6)
32955 .nr(8)
32956 .kr(1)
32957 .sr(1)
32958 .m(6)
32959 .n(8)
32960 .k(8)
32961 .qmin(128)
32962 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32963 }
32964
32965 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
32966 TEST_REQUIRES_ARM_NEON_FMA;
32967 GemmMicrokernelTester()
32968 .mr(6)
32969 .nr(8)
32970 .kr(1)
32971 .sr(1)
32972 .m(6)
32973 .n(8)
32974 .k(8)
32975 .qmax(128)
32976 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32977 }
32978
32979 TEST(GENERATE_F32_IGEMM_6X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
32980 TEST_REQUIRES_ARM_NEON_FMA;
32981 GemmMicrokernelTester()
32982 .mr(6)
32983 .nr(8)
32984 .kr(1)
32985 .sr(1)
32986 .m(6)
32987 .n(8)
32988 .k(8)
32989 .cm_stride(11)
32990 .Test(xnn_generate_f32_igemm_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
32991 }
32992#endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT