blob: e76b5f5db34786c1f60ffb0c4ab5ce82f3e9f25c [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/f32-igemm-minmax.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
26#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
27 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4) {
28 TEST_REQUIRES_ARM_NEON_FMA;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(4)
35 .n(8)
36 .k(4)
37 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
38 }
39
40 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_FMA;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(4)
48 .n(8)
49 .k(4)
50 .cn_stride(11)
51 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
52 }
53
54 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile) {
55 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080056 for (uint32_t n = 1; n <= 8; n++) {
57 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080058 GemmMicrokernelTester()
59 .mr(4)
60 .nr(8)
61 .kr(1)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(4)
66 .iterations(1)
67 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
68 }
69 }
70 }
71
72 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_m) {
73 TEST_REQUIRES_ARM_NEON_FMA;
74 for (uint32_t m = 1; m <= 4; m++) {
75 GemmMicrokernelTester()
76 .mr(4)
77 .nr(8)
78 .kr(1)
79 .sr(1)
80 .m(m)
81 .n(8)
82 .k(4)
83 .iterations(1)
84 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
85 }
86 }
87
88 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_eq_4_subtile_n) {
89 TEST_REQUIRES_ARM_NEON_FMA;
90 for (uint32_t n = 1; n <= 8; n++) {
91 GemmMicrokernelTester()
92 .mr(4)
93 .nr(8)
94 .kr(1)
95 .sr(1)
96 .m(4)
97 .n(n)
98 .k(4)
99 .iterations(1)
100 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
101 }
102 }
103
104 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4) {
105 TEST_REQUIRES_ARM_NEON_FMA;
106 for (size_t k = 1; k < 4; k++) {
107 GemmMicrokernelTester()
108 .mr(4)
109 .nr(8)
110 .kr(1)
111 .sr(1)
112 .m(4)
113 .n(8)
114 .k(k)
115 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
116 }
117 }
118
119 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_lt_4_subtile) {
120 TEST_REQUIRES_ARM_NEON_FMA;
121 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800122 for (uint32_t n = 1; n <= 8; n++) {
123 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800124 GemmMicrokernelTester()
125 .mr(4)
126 .nr(8)
127 .kr(1)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
133 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
134 }
135 }
136 }
137 }
138
139 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4) {
140 TEST_REQUIRES_ARM_NEON_FMA;
141 for (size_t k = 5; k < 8; k++) {
142 GemmMicrokernelTester()
143 .mr(4)
144 .nr(8)
145 .kr(1)
146 .sr(1)
147 .m(4)
148 .n(8)
149 .k(k)
150 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
151 }
152 }
153
154 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_gt_4_subtile) {
155 TEST_REQUIRES_ARM_NEON_FMA;
156 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800157 for (uint32_t n = 1; n <= 8; n++) {
158 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800159 GemmMicrokernelTester()
160 .mr(4)
161 .nr(8)
162 .kr(1)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
168 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
169 }
170 }
171 }
172 }
173
174 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4) {
175 TEST_REQUIRES_ARM_NEON_FMA;
176 for (size_t k = 8; k <= 40; k += 4) {
177 GemmMicrokernelTester()
178 .mr(4)
179 .nr(8)
180 .kr(1)
181 .sr(1)
182 .m(4)
183 .n(8)
184 .k(k)
185 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
186 }
187 }
188
189 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, k_div_4_subtile) {
190 TEST_REQUIRES_ARM_NEON_FMA;
191 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800192 for (uint32_t n = 1; n <= 8; n++) {
193 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800194 GemmMicrokernelTester()
195 .mr(4)
196 .nr(8)
197 .kr(1)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
203 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
204 }
205 }
206 }
207 }
208
209 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8) {
210 TEST_REQUIRES_ARM_NEON_FMA;
211 for (uint32_t n = 9; n < 16; n++) {
212 for (size_t k = 1; k <= 20; k += 5) {
213 GemmMicrokernelTester()
214 .mr(4)
215 .nr(8)
216 .kr(1)
217 .sr(1)
218 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800219 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800220 .k(k)
221 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
222 }
223 }
224 }
225
226 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_strided_cn) {
227 TEST_REQUIRES_ARM_NEON_FMA;
228 for (uint32_t n = 9; n < 16; n++) {
229 for (size_t k = 1; k <= 20; k += 5) {
230 GemmMicrokernelTester()
231 .mr(4)
232 .nr(8)
233 .kr(1)
234 .sr(1)
235 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800236 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800237 .k(k)
238 .cn_stride(11)
239 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
240 }
241 }
242 }
243
244 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_subtile) {
245 TEST_REQUIRES_ARM_NEON_FMA;
246 for (uint32_t n = 9; n < 16; n++) {
247 for (size_t k = 1; k <= 20; k += 5) {
248 for (uint32_t m = 1; m <= 4; m++) {
249 GemmMicrokernelTester()
250 .mr(4)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
258 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
259 }
260 }
261 }
262 }
263
264 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8) {
265 TEST_REQUIRES_ARM_NEON_FMA;
266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 20; k += 5) {
268 GemmMicrokernelTester()
269 .mr(4)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800274 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800275 .k(k)
276 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
277 }
278 }
279 }
280
281 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_strided_cn) {
282 TEST_REQUIRES_ARM_NEON_FMA;
283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 20; k += 5) {
285 GemmMicrokernelTester()
286 .mr(4)
287 .nr(8)
288 .kr(1)
289 .sr(1)
290 .m(4)
291 .n(n)
292 .k(k)
293 .cn_stride(11)
294 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
295 }
296 }
297 }
298
299 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_subtile) {
300 TEST_REQUIRES_ARM_NEON_FMA;
301 for (uint32_t n = 16; n <= 24; n += 8) {
302 for (size_t k = 1; k <= 20; k += 5) {
303 for (uint32_t m = 1; m <= 4; m++) {
304 GemmMicrokernelTester()
305 .mr(4)
306 .nr(8)
307 .kr(1)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
313 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
314 }
315 }
316 }
317 }
318
319 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, small_kernel) {
320 TEST_REQUIRES_ARM_NEON_FMA;
321 for (size_t k = 1; k <= 20; k += 5) {
322 GemmMicrokernelTester()
323 .mr(4)
324 .nr(8)
325 .kr(1)
326 .sr(1)
327 .m(4)
328 .n(8)
329 .k(k)
330 .ks(3)
331 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
332 }
333 }
334
335 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON_FMA;
337 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800338 for (uint32_t n = 1; n <= 8; n++) {
339 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800340 GemmMicrokernelTester()
341 .mr(4)
342 .nr(8)
343 .kr(1)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
350 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
351 }
352 }
353 }
354 }
355
356 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_gt_8_small_kernel) {
357 TEST_REQUIRES_ARM_NEON_FMA;
358 for (uint32_t n = 9; n < 16; n++) {
359 for (size_t k = 1; k <= 20; k += 5) {
360 GemmMicrokernelTester()
361 .mr(4)
362 .nr(8)
363 .kr(1)
364 .sr(1)
365 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800366 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800367 .k(k)
368 .ks(3)
369 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
370 }
371 }
372 }
373
374 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, n_div_8_small_kernel) {
375 TEST_REQUIRES_ARM_NEON_FMA;
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 20; k += 5) {
378 GemmMicrokernelTester()
379 .mr(4)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800384 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800385 .k(k)
386 .ks(3)
387 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
388 }
389 }
390 }
391
392 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON_FMA;
394 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800395 for (uint32_t n = 1; n <= 8; n++) {
396 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800397 GemmMicrokernelTester()
398 .mr(4)
399 .nr(8)
400 .kr(1)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
407 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
408 }
409 }
410 }
411 }
412
413 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, a_offset) {
414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t k = 1; k <= 20; k += 5) {
416 GemmMicrokernelTester()
417 .mr(4)
418 .nr(8)
419 .kr(1)
420 .sr(1)
421 .m(4)
422 .n(8)
423 .k(k)
424 .ks(3)
425 .a_offset(83)
426 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
427 }
428 }
429
430 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, zero) {
431 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800432 for (size_t k = 1; k <= 20; k += 5) {
433 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800434 GemmMicrokernelTester()
435 .mr(4)
436 .nr(8)
437 .kr(1)
438 .sr(1)
439 .m(4)
440 .n(8)
441 .k(k)
442 .ks(3)
443 .a_offset(83)
444 .zero_index(mz)
445 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
446 }
447 }
448 }
449
450 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmin) {
451 TEST_REQUIRES_ARM_NEON_FMA;
452 GemmMicrokernelTester()
453 .mr(4)
454 .nr(8)
455 .kr(1)
456 .sr(1)
457 .m(4)
458 .n(8)
459 .k(4)
460 .qmin(128)
461 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
462 }
463
464 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, qmax) {
465 TEST_REQUIRES_ARM_NEON_FMA;
466 GemmMicrokernelTester()
467 .mr(4)
468 .nr(8)
469 .kr(1)
470 .sr(1)
471 .m(4)
472 .n(8)
473 .k(4)
474 .qmax(128)
475 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
476 }
477
478 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD128, strided_cm) {
479 TEST_REQUIRES_ARM_NEON_FMA;
480 GemmMicrokernelTester()
481 .mr(4)
482 .nr(8)
483 .kr(1)
484 .sr(1)
485 .m(4)
486 .n(8)
487 .k(4)
488 .cm_stride(11)
489 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld128, xnn_init_f32_minmax_scalar_params);
490 }
491#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
492
493
494#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
495 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2) {
496 TEST_REQUIRES_ARM_NEON_FMA;
497 GemmMicrokernelTester()
498 .mr(4)
499 .nr(8)
500 .kr(1)
501 .sr(1)
502 .m(4)
503 .n(8)
504 .k(2)
505 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
506 }
507
508 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cn) {
509 TEST_REQUIRES_ARM_NEON_FMA;
510 GemmMicrokernelTester()
511 .mr(4)
512 .nr(8)
513 .kr(1)
514 .sr(1)
515 .m(4)
516 .n(8)
517 .k(2)
518 .cn_stride(11)
519 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
520 }
521
522 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile) {
523 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800524 for (uint32_t n = 1; n <= 8; n++) {
525 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800526 GemmMicrokernelTester()
527 .mr(4)
528 .nr(8)
529 .kr(1)
530 .sr(1)
531 .m(m)
532 .n(n)
533 .k(2)
534 .iterations(1)
535 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
536 }
537 }
538 }
539
540 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_m) {
541 TEST_REQUIRES_ARM_NEON_FMA;
542 for (uint32_t m = 1; m <= 4; m++) {
543 GemmMicrokernelTester()
544 .mr(4)
545 .nr(8)
546 .kr(1)
547 .sr(1)
548 .m(m)
549 .n(8)
550 .k(2)
551 .iterations(1)
552 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
553 }
554 }
555
556 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_eq_2_subtile_n) {
557 TEST_REQUIRES_ARM_NEON_FMA;
558 for (uint32_t n = 1; n <= 8; n++) {
559 GemmMicrokernelTester()
560 .mr(4)
561 .nr(8)
562 .kr(1)
563 .sr(1)
564 .m(4)
565 .n(n)
566 .k(2)
567 .iterations(1)
568 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
569 }
570 }
571
572 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2) {
573 TEST_REQUIRES_ARM_NEON_FMA;
574 for (size_t k = 1; k < 2; k++) {
575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(4)
581 .n(8)
582 .k(k)
583 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
584 }
585 }
586
587 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_lt_2_subtile) {
588 TEST_REQUIRES_ARM_NEON_FMA;
589 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800590 for (uint32_t n = 1; n <= 8; n++) {
591 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(m)
598 .n(n)
599 .k(k)
600 .iterations(1)
601 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
602 }
603 }
604 }
605 }
606
607 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2) {
608 TEST_REQUIRES_ARM_NEON_FMA;
609 for (size_t k = 3; k < 4; k++) {
610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(4)
616 .n(8)
617 .k(k)
618 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
619 }
620 }
621
622 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_gt_2_subtile) {
623 TEST_REQUIRES_ARM_NEON_FMA;
624 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800625 for (uint32_t n = 1; n <= 8; n++) {
626 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800627 GemmMicrokernelTester()
628 .mr(4)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(m)
633 .n(n)
634 .k(k)
635 .iterations(1)
636 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
637 }
638 }
639 }
640 }
641
642 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2) {
643 TEST_REQUIRES_ARM_NEON_FMA;
644 for (size_t k = 4; k <= 20; k += 2) {
645 GemmMicrokernelTester()
646 .mr(4)
647 .nr(8)
648 .kr(1)
649 .sr(1)
650 .m(4)
651 .n(8)
652 .k(k)
653 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
654 }
655 }
656
657 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, k_div_2_subtile) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800662 GemmMicrokernelTester()
663 .mr(4)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
671 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
672 }
673 }
674 }
675 }
676
677 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8) {
678 TEST_REQUIRES_ARM_NEON_FMA;
679 for (uint32_t n = 9; n < 16; n++) {
680 for (size_t k = 1; k <= 10; k += 3) {
681 GemmMicrokernelTester()
682 .mr(4)
683 .nr(8)
684 .kr(1)
685 .sr(1)
686 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800687 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800688 .k(k)
689 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
690 }
691 }
692 }
693
694 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_strided_cn) {
695 TEST_REQUIRES_ARM_NEON_FMA;
696 for (uint32_t n = 9; n < 16; n++) {
697 for (size_t k = 1; k <= 10; k += 3) {
698 GemmMicrokernelTester()
699 .mr(4)
700 .nr(8)
701 .kr(1)
702 .sr(1)
703 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800704 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800705 .k(k)
706 .cn_stride(11)
707 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
708 }
709 }
710 }
711
712 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_subtile) {
713 TEST_REQUIRES_ARM_NEON_FMA;
714 for (uint32_t n = 9; n < 16; n++) {
715 for (size_t k = 1; k <= 10; k += 3) {
716 for (uint32_t m = 1; m <= 4; m++) {
717 GemmMicrokernelTester()
718 .mr(4)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(m)
723 .n(n)
724 .k(k)
725 .iterations(1)
726 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
727 }
728 }
729 }
730 }
731
732 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8) {
733 TEST_REQUIRES_ARM_NEON_FMA;
734 for (uint32_t n = 16; n <= 24; n += 8) {
735 for (size_t k = 1; k <= 10; k += 3) {
736 GemmMicrokernelTester()
737 .mr(4)
738 .nr(8)
739 .kr(1)
740 .sr(1)
741 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800742 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800743 .k(k)
744 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
745 }
746 }
747 }
748
749 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_strided_cn) {
750 TEST_REQUIRES_ARM_NEON_FMA;
751 for (uint32_t n = 16; n <= 24; n += 8) {
752 for (size_t k = 1; k <= 10; k += 3) {
753 GemmMicrokernelTester()
754 .mr(4)
755 .nr(8)
756 .kr(1)
757 .sr(1)
758 .m(4)
759 .n(n)
760 .k(k)
761 .cn_stride(11)
762 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
763 }
764 }
765 }
766
767 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_subtile) {
768 TEST_REQUIRES_ARM_NEON_FMA;
769 for (uint32_t n = 16; n <= 24; n += 8) {
770 for (size_t k = 1; k <= 10; k += 3) {
771 for (uint32_t m = 1; m <= 4; m++) {
772 GemmMicrokernelTester()
773 .mr(4)
774 .nr(8)
775 .kr(1)
776 .sr(1)
777 .m(m)
778 .n(n)
779 .k(k)
780 .iterations(1)
781 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
782 }
783 }
784 }
785 }
786
787 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, small_kernel) {
788 TEST_REQUIRES_ARM_NEON_FMA;
789 for (size_t k = 1; k <= 10; k += 3) {
790 GemmMicrokernelTester()
791 .mr(4)
792 .nr(8)
793 .kr(1)
794 .sr(1)
795 .m(4)
796 .n(8)
797 .k(k)
798 .ks(3)
799 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
800 }
801 }
802
803 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, small_kernel_subtile) {
804 TEST_REQUIRES_ARM_NEON_FMA;
805 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800806 for (uint32_t n = 1; n <= 8; n++) {
807 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800808 GemmMicrokernelTester()
809 .mr(4)
810 .nr(8)
811 .kr(1)
812 .sr(1)
813 .m(m)
814 .n(n)
815 .k(k)
816 .ks(3)
817 .iterations(1)
818 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
819 }
820 }
821 }
822 }
823
824 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_gt_8_small_kernel) {
825 TEST_REQUIRES_ARM_NEON_FMA;
826 for (uint32_t n = 9; n < 16; n++) {
827 for (size_t k = 1; k <= 10; k += 3) {
828 GemmMicrokernelTester()
829 .mr(4)
830 .nr(8)
831 .kr(1)
832 .sr(1)
833 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800834 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800835 .k(k)
836 .ks(3)
837 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
838 }
839 }
840 }
841
842 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, n_div_8_small_kernel) {
843 TEST_REQUIRES_ARM_NEON_FMA;
844 for (uint32_t n = 16; n <= 24; n += 8) {
845 for (size_t k = 1; k <= 10; k += 3) {
846 GemmMicrokernelTester()
847 .mr(4)
848 .nr(8)
849 .kr(1)
850 .sr(1)
851 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800852 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800853 .k(k)
854 .ks(3)
855 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
856 }
857 }
858 }
859
860 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm_subtile) {
861 TEST_REQUIRES_ARM_NEON_FMA;
862 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800863 for (uint32_t n = 1; n <= 8; n++) {
864 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800865 GemmMicrokernelTester()
866 .mr(4)
867 .nr(8)
868 .kr(1)
869 .sr(1)
870 .m(m)
871 .n(n)
872 .k(k)
873 .cm_stride(11)
874 .iterations(1)
875 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
876 }
877 }
878 }
879 }
880
881 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, a_offset) {
882 TEST_REQUIRES_ARM_NEON_FMA;
883 for (size_t k = 1; k <= 10; k += 3) {
884 GemmMicrokernelTester()
885 .mr(4)
886 .nr(8)
887 .kr(1)
888 .sr(1)
889 .m(4)
890 .n(8)
891 .k(k)
892 .ks(3)
893 .a_offset(43)
894 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
895 }
896 }
897
898 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, zero) {
899 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800900 for (size_t k = 1; k <= 10; k += 3) {
901 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800902 GemmMicrokernelTester()
903 .mr(4)
904 .nr(8)
905 .kr(1)
906 .sr(1)
907 .m(4)
908 .n(8)
909 .k(k)
910 .ks(3)
911 .a_offset(43)
912 .zero_index(mz)
913 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
914 }
915 }
916 }
917
918 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmin) {
919 TEST_REQUIRES_ARM_NEON_FMA;
920 GemmMicrokernelTester()
921 .mr(4)
922 .nr(8)
923 .kr(1)
924 .sr(1)
925 .m(4)
926 .n(8)
927 .k(2)
928 .qmin(128)
929 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
930 }
931
932 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, qmax) {
933 TEST_REQUIRES_ARM_NEON_FMA;
934 GemmMicrokernelTester()
935 .mr(4)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(4)
940 .n(8)
941 .k(2)
942 .qmax(128)
943 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
944 }
945
946 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_LD64, strided_cm) {
947 TEST_REQUIRES_ARM_NEON_FMA;
948 GemmMicrokernelTester()
949 .mr(4)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(4)
954 .n(8)
955 .k(2)
956 .cm_stride(11)
957 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, xnn_init_f32_minmax_scalar_params);
958 }
959#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
960
961
962#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
963 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
964 TEST_REQUIRES_ARM_NEON_FMA;
965 GemmMicrokernelTester()
966 .mr(1)
967 .nr(8)
968 .kr(1)
969 .sr(1)
970 .m(1)
971 .n(8)
972 .k(8)
973 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
974 }
975
976 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
977 TEST_REQUIRES_ARM_NEON_FMA;
978 GemmMicrokernelTester()
979 .mr(1)
980 .nr(8)
981 .kr(1)
982 .sr(1)
983 .m(1)
984 .n(8)
985 .k(8)
986 .cn_stride(11)
987 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
988 }
989
990 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
991 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800992 for (uint32_t n = 1; n <= 8; n++) {
993 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800994 GemmMicrokernelTester()
995 .mr(1)
996 .nr(8)
997 .kr(1)
998 .sr(1)
999 .m(m)
1000 .n(n)
1001 .k(8)
1002 .iterations(1)
1003 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1004 }
1005 }
1006 }
1007
1008 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_m) {
1009 TEST_REQUIRES_ARM_NEON_FMA;
1010 for (uint32_t m = 1; m <= 1; m++) {
1011 GemmMicrokernelTester()
1012 .mr(1)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(m)
1017 .n(8)
1018 .k(8)
1019 .iterations(1)
1020 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1021 }
1022 }
1023
1024 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile_n) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (uint32_t n = 1; n <= 8; n++) {
1027 GemmMicrokernelTester()
1028 .mr(1)
1029 .nr(8)
1030 .kr(1)
1031 .sr(1)
1032 .m(1)
1033 .n(n)
1034 .k(8)
1035 .iterations(1)
1036 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1037 }
1038 }
1039
1040 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16) {
1041 TEST_REQUIRES_ARM_NEON_FMA;
1042 GemmMicrokernelTester()
1043 .mr(1)
1044 .nr(8)
1045 .kr(1)
1046 .sr(1)
1047 .m(1)
1048 .n(8)
1049 .k(16)
1050 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1051 }
1052
1053 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_eq_16_subtile) {
1054 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001055 for (uint32_t n = 1; n <= 8; n++) {
1056 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001057 GemmMicrokernelTester()
1058 .mr(1)
1059 .nr(8)
1060 .kr(1)
1061 .sr(1)
1062 .m(m)
1063 .n(n)
1064 .k(16)
1065 .iterations(1)
1066 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1067 }
1068 }
1069 }
1070
1071 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16) {
1072 TEST_REQUIRES_ARM_NEON_FMA;
1073 for (size_t k = 1; k < 16; k++) {
1074 GemmMicrokernelTester()
1075 .mr(1)
1076 .nr(8)
1077 .kr(1)
1078 .sr(1)
1079 .m(1)
1080 .n(8)
1081 .k(k)
1082 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1083 }
1084 }
1085
1086 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_lt_16_subtile) {
1087 TEST_REQUIRES_ARM_NEON_FMA;
1088 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001089 for (uint32_t n = 1; n <= 8; n++) {
1090 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001091 GemmMicrokernelTester()
1092 .mr(1)
1093 .nr(8)
1094 .kr(1)
1095 .sr(1)
1096 .m(m)
1097 .n(n)
1098 .k(k)
1099 .iterations(1)
1100 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1101 }
1102 }
1103 }
1104 }
1105
1106 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16) {
1107 TEST_REQUIRES_ARM_NEON_FMA;
1108 for (size_t k = 17; k < 32; k++) {
1109 GemmMicrokernelTester()
1110 .mr(1)
1111 .nr(8)
1112 .kr(1)
1113 .sr(1)
1114 .m(1)
1115 .n(8)
1116 .k(k)
1117 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1118 }
1119 }
1120
1121 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_gt_16_subtile) {
1122 TEST_REQUIRES_ARM_NEON_FMA;
1123 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001124 for (uint32_t n = 1; n <= 8; n++) {
1125 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001126 GemmMicrokernelTester()
1127 .mr(1)
1128 .nr(8)
1129 .kr(1)
1130 .sr(1)
1131 .m(m)
1132 .n(n)
1133 .k(k)
1134 .iterations(1)
1135 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1136 }
1137 }
1138 }
1139 }
1140
1141 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8) {
1142 TEST_REQUIRES_ARM_NEON_FMA;
1143 for (size_t k = 24; k <= 80; k += 8) {
1144 GemmMicrokernelTester()
1145 .mr(1)
1146 .nr(8)
1147 .kr(1)
1148 .sr(1)
1149 .m(1)
1150 .n(8)
1151 .k(k)
1152 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1153 }
1154 }
1155
1156 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, k_div_8_subtile) {
1157 TEST_REQUIRES_ARM_NEON_FMA;
1158 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001159 for (uint32_t n = 1; n <= 8; n++) {
1160 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001161 GemmMicrokernelTester()
1162 .mr(1)
1163 .nr(8)
1164 .kr(1)
1165 .sr(1)
1166 .m(m)
1167 .n(n)
1168 .k(k)
1169 .iterations(1)
1170 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1171 }
1172 }
1173 }
1174 }
1175
1176 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8) {
1177 TEST_REQUIRES_ARM_NEON_FMA;
1178 for (uint32_t n = 9; n < 16; n++) {
1179 for (size_t k = 1; k <= 40; k += 9) {
1180 GemmMicrokernelTester()
1181 .mr(1)
1182 .nr(8)
1183 .kr(1)
1184 .sr(1)
1185 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001186 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001187 .k(k)
1188 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1189 }
1190 }
1191 }
1192
1193 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_strided_cn) {
1194 TEST_REQUIRES_ARM_NEON_FMA;
1195 for (uint32_t n = 9; n < 16; n++) {
1196 for (size_t k = 1; k <= 40; k += 9) {
1197 GemmMicrokernelTester()
1198 .mr(1)
1199 .nr(8)
1200 .kr(1)
1201 .sr(1)
1202 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001203 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001204 .k(k)
1205 .cn_stride(11)
1206 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1207 }
1208 }
1209 }
1210
1211 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_subtile) {
1212 TEST_REQUIRES_ARM_NEON_FMA;
1213 for (uint32_t n = 9; n < 16; n++) {
1214 for (size_t k = 1; k <= 40; k += 9) {
1215 for (uint32_t m = 1; m <= 1; m++) {
1216 GemmMicrokernelTester()
1217 .mr(1)
1218 .nr(8)
1219 .kr(1)
1220 .sr(1)
1221 .m(m)
1222 .n(n)
1223 .k(k)
1224 .iterations(1)
1225 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1226 }
1227 }
1228 }
1229 }
1230
1231 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8) {
1232 TEST_REQUIRES_ARM_NEON_FMA;
1233 for (uint32_t n = 16; n <= 24; n += 8) {
1234 for (size_t k = 1; k <= 40; k += 9) {
1235 GemmMicrokernelTester()
1236 .mr(1)
1237 .nr(8)
1238 .kr(1)
1239 .sr(1)
1240 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001241 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001242 .k(k)
1243 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1244 }
1245 }
1246 }
1247
1248 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_strided_cn) {
1249 TEST_REQUIRES_ARM_NEON_FMA;
1250 for (uint32_t n = 16; n <= 24; n += 8) {
1251 for (size_t k = 1; k <= 40; k += 9) {
1252 GemmMicrokernelTester()
1253 .mr(1)
1254 .nr(8)
1255 .kr(1)
1256 .sr(1)
1257 .m(1)
1258 .n(n)
1259 .k(k)
1260 .cn_stride(11)
1261 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1262 }
1263 }
1264 }
1265
1266 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_subtile) {
1267 TEST_REQUIRES_ARM_NEON_FMA;
1268 for (uint32_t n = 16; n <= 24; n += 8) {
1269 for (size_t k = 1; k <= 40; k += 9) {
1270 for (uint32_t m = 1; m <= 1; m++) {
1271 GemmMicrokernelTester()
1272 .mr(1)
1273 .nr(8)
1274 .kr(1)
1275 .sr(1)
1276 .m(m)
1277 .n(n)
1278 .k(k)
1279 .iterations(1)
1280 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1281 }
1282 }
1283 }
1284 }
1285
1286 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
1287 TEST_REQUIRES_ARM_NEON_FMA;
1288 for (size_t k = 1; k <= 40; k += 9) {
1289 GemmMicrokernelTester()
1290 .mr(1)
1291 .nr(8)
1292 .kr(1)
1293 .sr(1)
1294 .m(1)
1295 .n(8)
1296 .k(k)
1297 .ks(3)
1298 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1299 }
1300 }
1301
1302 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
1303 TEST_REQUIRES_ARM_NEON_FMA;
1304 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001305 for (uint32_t n = 1; n <= 8; n++) {
1306 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001307 GemmMicrokernelTester()
1308 .mr(1)
1309 .nr(8)
1310 .kr(1)
1311 .sr(1)
1312 .m(m)
1313 .n(n)
1314 .k(k)
1315 .ks(3)
1316 .iterations(1)
1317 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1318 }
1319 }
1320 }
1321 }
1322
1323 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_gt_8_small_kernel) {
1324 TEST_REQUIRES_ARM_NEON_FMA;
1325 for (uint32_t n = 9; n < 16; n++) {
1326 for (size_t k = 1; k <= 40; k += 9) {
1327 GemmMicrokernelTester()
1328 .mr(1)
1329 .nr(8)
1330 .kr(1)
1331 .sr(1)
1332 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001333 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001334 .k(k)
1335 .ks(3)
1336 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1337 }
1338 }
1339 }
1340
1341 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, n_div_8_small_kernel) {
1342 TEST_REQUIRES_ARM_NEON_FMA;
1343 for (uint32_t n = 16; n <= 24; n += 8) {
1344 for (size_t k = 1; k <= 40; k += 9) {
1345 GemmMicrokernelTester()
1346 .mr(1)
1347 .nr(8)
1348 .kr(1)
1349 .sr(1)
1350 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001351 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001352 .k(k)
1353 .ks(3)
1354 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1355 }
1356 }
1357 }
1358
1359 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
1360 TEST_REQUIRES_ARM_NEON_FMA;
1361 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001362 for (uint32_t n = 1; n <= 8; n++) {
1363 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001364 GemmMicrokernelTester()
1365 .mr(1)
1366 .nr(8)
1367 .kr(1)
1368 .sr(1)
1369 .m(m)
1370 .n(n)
1371 .k(k)
1372 .cm_stride(11)
1373 .iterations(1)
1374 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1375 }
1376 }
1377 }
1378 }
1379
1380 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
1381 TEST_REQUIRES_ARM_NEON_FMA;
1382 for (size_t k = 1; k <= 40; k += 9) {
1383 GemmMicrokernelTester()
1384 .mr(1)
1385 .nr(8)
1386 .kr(1)
1387 .sr(1)
1388 .m(1)
1389 .n(8)
1390 .k(k)
1391 .ks(3)
1392 .a_offset(43)
1393 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1394 }
1395 }
1396
1397 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, zero) {
1398 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001399 for (size_t k = 1; k <= 40; k += 9) {
1400 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001401 GemmMicrokernelTester()
1402 .mr(1)
1403 .nr(8)
1404 .kr(1)
1405 .sr(1)
1406 .m(1)
1407 .n(8)
1408 .k(k)
1409 .ks(3)
1410 .a_offset(43)
1411 .zero_index(mz)
1412 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1413 }
1414 }
1415 }
1416
1417 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmin) {
1418 TEST_REQUIRES_ARM_NEON_FMA;
1419 GemmMicrokernelTester()
1420 .mr(1)
1421 .nr(8)
1422 .kr(1)
1423 .sr(1)
1424 .m(1)
1425 .n(8)
1426 .k(8)
1427 .qmin(128)
1428 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1429 }
1430
1431 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, qmax) {
1432 TEST_REQUIRES_ARM_NEON_FMA;
1433 GemmMicrokernelTester()
1434 .mr(1)
1435 .nr(8)
1436 .kr(1)
1437 .sr(1)
1438 .m(1)
1439 .n(8)
1440 .k(8)
1441 .qmax(128)
1442 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1443 }
1444
1445 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
1446 TEST_REQUIRES_ARM_NEON_FMA;
1447 GemmMicrokernelTester()
1448 .mr(1)
1449 .nr(8)
1450 .kr(1)
1451 .sr(1)
1452 .m(1)
1453 .n(8)
1454 .k(8)
1455 .cm_stride(11)
1456 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
1457 }
1458#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1459
1460
1461#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1462 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
1463 TEST_REQUIRES_ARM_NEON_FMA;
1464 GemmMicrokernelTester()
1465 .mr(1)
1466 .nr(8)
1467 .kr(1)
1468 .sr(1)
1469 .m(1)
1470 .n(8)
1471 .k(8)
1472 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1473 }
1474
1475 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
1476 TEST_REQUIRES_ARM_NEON_FMA;
1477 GemmMicrokernelTester()
1478 .mr(1)
1479 .nr(8)
1480 .kr(1)
1481 .sr(1)
1482 .m(1)
1483 .n(8)
1484 .k(8)
1485 .cn_stride(11)
1486 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1487 }
1488
1489 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
1490 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001491 for (uint32_t n = 1; n <= 8; n++) {
1492 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001493 GemmMicrokernelTester()
1494 .mr(1)
1495 .nr(8)
1496 .kr(1)
1497 .sr(1)
1498 .m(m)
1499 .n(n)
1500 .k(8)
1501 .iterations(1)
1502 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1503 }
1504 }
1505 }
1506
1507 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
1508 TEST_REQUIRES_ARM_NEON_FMA;
1509 for (uint32_t m = 1; m <= 1; m++) {
1510 GemmMicrokernelTester()
1511 .mr(1)
1512 .nr(8)
1513 .kr(1)
1514 .sr(1)
1515 .m(m)
1516 .n(8)
1517 .k(8)
1518 .iterations(1)
1519 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1520 }
1521 }
1522
1523 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
1524 TEST_REQUIRES_ARM_NEON_FMA;
1525 for (uint32_t n = 1; n <= 8; n++) {
1526 GemmMicrokernelTester()
1527 .mr(1)
1528 .nr(8)
1529 .kr(1)
1530 .sr(1)
1531 .m(1)
1532 .n(n)
1533 .k(8)
1534 .iterations(1)
1535 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1536 }
1537 }
1538
1539 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
1540 TEST_REQUIRES_ARM_NEON_FMA;
1541 GemmMicrokernelTester()
1542 .mr(1)
1543 .nr(8)
1544 .kr(1)
1545 .sr(1)
1546 .m(1)
1547 .n(8)
1548 .k(16)
1549 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1550 }
1551
1552 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
1553 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001554 for (uint32_t n = 1; n <= 8; n++) {
1555 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001556 GemmMicrokernelTester()
1557 .mr(1)
1558 .nr(8)
1559 .kr(1)
1560 .sr(1)
1561 .m(m)
1562 .n(n)
1563 .k(16)
1564 .iterations(1)
1565 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1566 }
1567 }
1568 }
1569
1570 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
1571 TEST_REQUIRES_ARM_NEON_FMA;
1572 for (size_t k = 1; k < 16; k++) {
1573 GemmMicrokernelTester()
1574 .mr(1)
1575 .nr(8)
1576 .kr(1)
1577 .sr(1)
1578 .m(1)
1579 .n(8)
1580 .k(k)
1581 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1582 }
1583 }
1584
1585 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
1586 TEST_REQUIRES_ARM_NEON_FMA;
1587 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001588 for (uint32_t n = 1; n <= 8; n++) {
1589 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001590 GemmMicrokernelTester()
1591 .mr(1)
1592 .nr(8)
1593 .kr(1)
1594 .sr(1)
1595 .m(m)
1596 .n(n)
1597 .k(k)
1598 .iterations(1)
1599 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1600 }
1601 }
1602 }
1603 }
1604
1605 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
1606 TEST_REQUIRES_ARM_NEON_FMA;
1607 for (size_t k = 17; k < 32; k++) {
1608 GemmMicrokernelTester()
1609 .mr(1)
1610 .nr(8)
1611 .kr(1)
1612 .sr(1)
1613 .m(1)
1614 .n(8)
1615 .k(k)
1616 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1617 }
1618 }
1619
1620 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
1621 TEST_REQUIRES_ARM_NEON_FMA;
1622 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001623 for (uint32_t n = 1; n <= 8; n++) {
1624 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001625 GemmMicrokernelTester()
1626 .mr(1)
1627 .nr(8)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(n)
1632 .k(k)
1633 .iterations(1)
1634 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1635 }
1636 }
1637 }
1638 }
1639
1640 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
1641 TEST_REQUIRES_ARM_NEON_FMA;
1642 for (size_t k = 24; k <= 80; k += 8) {
1643 GemmMicrokernelTester()
1644 .mr(1)
1645 .nr(8)
1646 .kr(1)
1647 .sr(1)
1648 .m(1)
1649 .n(8)
1650 .k(k)
1651 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1652 }
1653 }
1654
1655 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
1656 TEST_REQUIRES_ARM_NEON_FMA;
1657 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001658 for (uint32_t n = 1; n <= 8; n++) {
1659 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001660 GemmMicrokernelTester()
1661 .mr(1)
1662 .nr(8)
1663 .kr(1)
1664 .sr(1)
1665 .m(m)
1666 .n(n)
1667 .k(k)
1668 .iterations(1)
1669 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1670 }
1671 }
1672 }
1673 }
1674
1675 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
1676 TEST_REQUIRES_ARM_NEON_FMA;
1677 for (uint32_t n = 9; n < 16; n++) {
1678 for (size_t k = 1; k <= 40; k += 9) {
1679 GemmMicrokernelTester()
1680 .mr(1)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001685 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001686 .k(k)
1687 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1688 }
1689 }
1690 }
1691
1692 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
1693 TEST_REQUIRES_ARM_NEON_FMA;
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 40; k += 9) {
1696 GemmMicrokernelTester()
1697 .mr(1)
1698 .nr(8)
1699 .kr(1)
1700 .sr(1)
1701 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001702 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001703 .k(k)
1704 .cn_stride(11)
1705 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1706 }
1707 }
1708 }
1709
1710 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
1711 TEST_REQUIRES_ARM_NEON_FMA;
1712 for (uint32_t n = 9; n < 16; n++) {
1713 for (size_t k = 1; k <= 40; k += 9) {
1714 for (uint32_t m = 1; m <= 1; m++) {
1715 GemmMicrokernelTester()
1716 .mr(1)
1717 .nr(8)
1718 .kr(1)
1719 .sr(1)
1720 .m(m)
1721 .n(n)
1722 .k(k)
1723 .iterations(1)
1724 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1725 }
1726 }
1727 }
1728 }
1729
1730 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
1731 TEST_REQUIRES_ARM_NEON_FMA;
1732 for (uint32_t n = 16; n <= 24; n += 8) {
1733 for (size_t k = 1; k <= 40; k += 9) {
1734 GemmMicrokernelTester()
1735 .mr(1)
1736 .nr(8)
1737 .kr(1)
1738 .sr(1)
1739 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001740 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001741 .k(k)
1742 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1743 }
1744 }
1745 }
1746
1747 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
1748 TEST_REQUIRES_ARM_NEON_FMA;
1749 for (uint32_t n = 16; n <= 24; n += 8) {
1750 for (size_t k = 1; k <= 40; k += 9) {
1751 GemmMicrokernelTester()
1752 .mr(1)
1753 .nr(8)
1754 .kr(1)
1755 .sr(1)
1756 .m(1)
1757 .n(n)
1758 .k(k)
1759 .cn_stride(11)
1760 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1761 }
1762 }
1763 }
1764
1765 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
1766 TEST_REQUIRES_ARM_NEON_FMA;
1767 for (uint32_t n = 16; n <= 24; n += 8) {
1768 for (size_t k = 1; k <= 40; k += 9) {
1769 for (uint32_t m = 1; m <= 1; m++) {
1770 GemmMicrokernelTester()
1771 .mr(1)
1772 .nr(8)
1773 .kr(1)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
1779 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1780 }
1781 }
1782 }
1783 }
1784
1785 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
1786 TEST_REQUIRES_ARM_NEON_FMA;
1787 for (size_t k = 1; k <= 40; k += 9) {
1788 GemmMicrokernelTester()
1789 .mr(1)
1790 .nr(8)
1791 .kr(1)
1792 .sr(1)
1793 .m(1)
1794 .n(8)
1795 .k(k)
1796 .ks(3)
1797 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1798 }
1799 }
1800
1801 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
1802 TEST_REQUIRES_ARM_NEON_FMA;
1803 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001804 for (uint32_t n = 1; n <= 8; n++) {
1805 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001806 GemmMicrokernelTester()
1807 .mr(1)
1808 .nr(8)
1809 .kr(1)
1810 .sr(1)
1811 .m(m)
1812 .n(n)
1813 .k(k)
1814 .ks(3)
1815 .iterations(1)
1816 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1817 }
1818 }
1819 }
1820 }
1821
1822 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
1823 TEST_REQUIRES_ARM_NEON_FMA;
1824 for (uint32_t n = 9; n < 16; n++) {
1825 for (size_t k = 1; k <= 40; k += 9) {
1826 GemmMicrokernelTester()
1827 .mr(1)
1828 .nr(8)
1829 .kr(1)
1830 .sr(1)
1831 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001832 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001833 .k(k)
1834 .ks(3)
1835 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1836 }
1837 }
1838 }
1839
1840 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
1841 TEST_REQUIRES_ARM_NEON_FMA;
1842 for (uint32_t n = 16; n <= 24; n += 8) {
1843 for (size_t k = 1; k <= 40; k += 9) {
1844 GemmMicrokernelTester()
1845 .mr(1)
1846 .nr(8)
1847 .kr(1)
1848 .sr(1)
1849 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001850 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001851 .k(k)
1852 .ks(3)
1853 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1854 }
1855 }
1856 }
1857
1858 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
1859 TEST_REQUIRES_ARM_NEON_FMA;
1860 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001861 for (uint32_t n = 1; n <= 8; n++) {
1862 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001863 GemmMicrokernelTester()
1864 .mr(1)
1865 .nr(8)
1866 .kr(1)
1867 .sr(1)
1868 .m(m)
1869 .n(n)
1870 .k(k)
1871 .cm_stride(11)
1872 .iterations(1)
1873 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1874 }
1875 }
1876 }
1877 }
1878
1879 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
1880 TEST_REQUIRES_ARM_NEON_FMA;
1881 for (size_t k = 1; k <= 40; k += 9) {
1882 GemmMicrokernelTester()
1883 .mr(1)
1884 .nr(8)
1885 .kr(1)
1886 .sr(1)
1887 .m(1)
1888 .n(8)
1889 .k(k)
1890 .ks(3)
1891 .a_offset(43)
1892 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1893 }
1894 }
1895
1896 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
1897 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001898 for (size_t k = 1; k <= 40; k += 9) {
1899 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001900 GemmMicrokernelTester()
1901 .mr(1)
1902 .nr(8)
1903 .kr(1)
1904 .sr(1)
1905 .m(1)
1906 .n(8)
1907 .k(k)
1908 .ks(3)
1909 .a_offset(43)
1910 .zero_index(mz)
1911 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1912 }
1913 }
1914 }
1915
1916 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
1917 TEST_REQUIRES_ARM_NEON_FMA;
1918 GemmMicrokernelTester()
1919 .mr(1)
1920 .nr(8)
1921 .kr(1)
1922 .sr(1)
1923 .m(1)
1924 .n(8)
1925 .k(8)
1926 .qmin(128)
1927 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1928 }
1929
1930 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
1931 TEST_REQUIRES_ARM_NEON_FMA;
1932 GemmMicrokernelTester()
1933 .mr(1)
1934 .nr(8)
1935 .kr(1)
1936 .sr(1)
1937 .m(1)
1938 .n(8)
1939 .k(8)
1940 .qmax(128)
1941 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1942 }
1943
1944 TEST(F32_IGEMM_MINMAX_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
1945 TEST_REQUIRES_ARM_NEON_FMA;
1946 GemmMicrokernelTester()
1947 .mr(1)
1948 .nr(8)
1949 .kr(1)
1950 .sr(1)
1951 .m(1)
1952 .n(8)
1953 .k(8)
1954 .cm_stride(11)
1955 .Test(xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
1956 }
1957#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1958
1959
1960#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1961 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
1962 TEST_REQUIRES_ARM_NEON_FMA;
1963 GemmMicrokernelTester()
1964 .mr(4)
1965 .nr(8)
1966 .kr(1)
1967 .sr(1)
1968 .m(4)
1969 .n(8)
1970 .k(4)
1971 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1972 }
1973
1974 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
1975 TEST_REQUIRES_ARM_NEON_FMA;
1976 GemmMicrokernelTester()
1977 .mr(4)
1978 .nr(8)
1979 .kr(1)
1980 .sr(1)
1981 .m(4)
1982 .n(8)
1983 .k(4)
1984 .cn_stride(11)
1985 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
1986 }
1987
1988 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
1989 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001990 for (uint32_t n = 1; n <= 8; n++) {
1991 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001992 GemmMicrokernelTester()
1993 .mr(4)
1994 .nr(8)
1995 .kr(1)
1996 .sr(1)
1997 .m(m)
1998 .n(n)
1999 .k(4)
2000 .iterations(1)
2001 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2002 }
2003 }
2004 }
2005
2006 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
2007 TEST_REQUIRES_ARM_NEON_FMA;
2008 for (uint32_t m = 1; m <= 4; m++) {
2009 GemmMicrokernelTester()
2010 .mr(4)
2011 .nr(8)
2012 .kr(1)
2013 .sr(1)
2014 .m(m)
2015 .n(8)
2016 .k(4)
2017 .iterations(1)
2018 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2019 }
2020 }
2021
2022 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
2023 TEST_REQUIRES_ARM_NEON_FMA;
2024 for (uint32_t n = 1; n <= 8; n++) {
2025 GemmMicrokernelTester()
2026 .mr(4)
2027 .nr(8)
2028 .kr(1)
2029 .sr(1)
2030 .m(4)
2031 .n(n)
2032 .k(4)
2033 .iterations(1)
2034 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2035 }
2036 }
2037
2038 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
2039 TEST_REQUIRES_ARM_NEON_FMA;
2040 GemmMicrokernelTester()
2041 .mr(4)
2042 .nr(8)
2043 .kr(1)
2044 .sr(1)
2045 .m(4)
2046 .n(8)
2047 .k(8)
2048 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2049 }
2050
2051 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
2052 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002053 for (uint32_t n = 1; n <= 8; n++) {
2054 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002055 GemmMicrokernelTester()
2056 .mr(4)
2057 .nr(8)
2058 .kr(1)
2059 .sr(1)
2060 .m(m)
2061 .n(n)
2062 .k(8)
2063 .iterations(1)
2064 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2065 }
2066 }
2067 }
2068
2069 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
2070 TEST_REQUIRES_ARM_NEON_FMA;
2071 for (size_t k = 1; k < 8; k++) {
2072 GemmMicrokernelTester()
2073 .mr(4)
2074 .nr(8)
2075 .kr(1)
2076 .sr(1)
2077 .m(4)
2078 .n(8)
2079 .k(k)
2080 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2081 }
2082 }
2083
2084 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
2085 TEST_REQUIRES_ARM_NEON_FMA;
2086 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002087 for (uint32_t n = 1; n <= 8; n++) {
2088 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002089 GemmMicrokernelTester()
2090 .mr(4)
2091 .nr(8)
2092 .kr(1)
2093 .sr(1)
2094 .m(m)
2095 .n(n)
2096 .k(k)
2097 .iterations(1)
2098 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2099 }
2100 }
2101 }
2102 }
2103
2104 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
2105 TEST_REQUIRES_ARM_NEON_FMA;
2106 for (size_t k = 9; k < 16; k++) {
2107 GemmMicrokernelTester()
2108 .mr(4)
2109 .nr(8)
2110 .kr(1)
2111 .sr(1)
2112 .m(4)
2113 .n(8)
2114 .k(k)
2115 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2116 }
2117 }
2118
2119 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
2120 TEST_REQUIRES_ARM_NEON_FMA;
2121 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002122 for (uint32_t n = 1; n <= 8; n++) {
2123 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002124 GemmMicrokernelTester()
2125 .mr(4)
2126 .nr(8)
2127 .kr(1)
2128 .sr(1)
2129 .m(m)
2130 .n(n)
2131 .k(k)
2132 .iterations(1)
2133 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2134 }
2135 }
2136 }
2137 }
2138
2139 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
2140 TEST_REQUIRES_ARM_NEON_FMA;
2141 for (size_t k = 12; k <= 40; k += 4) {
2142 GemmMicrokernelTester()
2143 .mr(4)
2144 .nr(8)
2145 .kr(1)
2146 .sr(1)
2147 .m(4)
2148 .n(8)
2149 .k(k)
2150 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2151 }
2152 }
2153
2154 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
2155 TEST_REQUIRES_ARM_NEON_FMA;
2156 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002157 for (uint32_t n = 1; n <= 8; n++) {
2158 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002159 GemmMicrokernelTester()
2160 .mr(4)
2161 .nr(8)
2162 .kr(1)
2163 .sr(1)
2164 .m(m)
2165 .n(n)
2166 .k(k)
2167 .iterations(1)
2168 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2169 }
2170 }
2171 }
2172 }
2173
2174 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
2175 TEST_REQUIRES_ARM_NEON_FMA;
2176 for (uint32_t n = 9; n < 16; n++) {
2177 for (size_t k = 1; k <= 20; k += 5) {
2178 GemmMicrokernelTester()
2179 .mr(4)
2180 .nr(8)
2181 .kr(1)
2182 .sr(1)
2183 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002184 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002185 .k(k)
2186 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2187 }
2188 }
2189 }
2190
2191 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
2192 TEST_REQUIRES_ARM_NEON_FMA;
2193 for (uint32_t n = 9; n < 16; n++) {
2194 for (size_t k = 1; k <= 20; k += 5) {
2195 GemmMicrokernelTester()
2196 .mr(4)
2197 .nr(8)
2198 .kr(1)
2199 .sr(1)
2200 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002201 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002202 .k(k)
2203 .cn_stride(11)
2204 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2205 }
2206 }
2207 }
2208
2209 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
2210 TEST_REQUIRES_ARM_NEON_FMA;
2211 for (uint32_t n = 9; n < 16; n++) {
2212 for (size_t k = 1; k <= 20; k += 5) {
2213 for (uint32_t m = 1; m <= 4; m++) {
2214 GemmMicrokernelTester()
2215 .mr(4)
2216 .nr(8)
2217 .kr(1)
2218 .sr(1)
2219 .m(m)
2220 .n(n)
2221 .k(k)
2222 .iterations(1)
2223 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2224 }
2225 }
2226 }
2227 }
2228
2229 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
2230 TEST_REQUIRES_ARM_NEON_FMA;
2231 for (uint32_t n = 16; n <= 24; n += 8) {
2232 for (size_t k = 1; k <= 20; k += 5) {
2233 GemmMicrokernelTester()
2234 .mr(4)
2235 .nr(8)
2236 .kr(1)
2237 .sr(1)
2238 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002239 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002240 .k(k)
2241 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2242 }
2243 }
2244 }
2245
2246 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
2247 TEST_REQUIRES_ARM_NEON_FMA;
2248 for (uint32_t n = 16; n <= 24; n += 8) {
2249 for (size_t k = 1; k <= 20; k += 5) {
2250 GemmMicrokernelTester()
2251 .mr(4)
2252 .nr(8)
2253 .kr(1)
2254 .sr(1)
2255 .m(4)
2256 .n(n)
2257 .k(k)
2258 .cn_stride(11)
2259 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2260 }
2261 }
2262 }
2263
2264 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
2265 TEST_REQUIRES_ARM_NEON_FMA;
2266 for (uint32_t n = 16; n <= 24; n += 8) {
2267 for (size_t k = 1; k <= 20; k += 5) {
2268 for (uint32_t m = 1; m <= 4; m++) {
2269 GemmMicrokernelTester()
2270 .mr(4)
2271 .nr(8)
2272 .kr(1)
2273 .sr(1)
2274 .m(m)
2275 .n(n)
2276 .k(k)
2277 .iterations(1)
2278 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2279 }
2280 }
2281 }
2282 }
2283
2284 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel) {
2285 TEST_REQUIRES_ARM_NEON_FMA;
2286 for (size_t k = 1; k <= 20; k += 5) {
2287 GemmMicrokernelTester()
2288 .mr(4)
2289 .nr(8)
2290 .kr(1)
2291 .sr(1)
2292 .m(4)
2293 .n(8)
2294 .k(k)
2295 .ks(3)
2296 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2297 }
2298 }
2299
2300 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel_subtile) {
2301 TEST_REQUIRES_ARM_NEON_FMA;
2302 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002303 for (uint32_t n = 1; n <= 8; n++) {
2304 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002305 GemmMicrokernelTester()
2306 .mr(4)
2307 .nr(8)
2308 .kr(1)
2309 .sr(1)
2310 .m(m)
2311 .n(n)
2312 .k(k)
2313 .ks(3)
2314 .iterations(1)
2315 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2316 }
2317 }
2318 }
2319 }
2320
2321 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_small_kernel) {
2322 TEST_REQUIRES_ARM_NEON_FMA;
2323 for (uint32_t n = 9; n < 16; n++) {
2324 for (size_t k = 1; k <= 20; k += 5) {
2325 GemmMicrokernelTester()
2326 .mr(4)
2327 .nr(8)
2328 .kr(1)
2329 .sr(1)
2330 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002331 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002332 .k(k)
2333 .ks(3)
2334 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2335 }
2336 }
2337 }
2338
2339 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_small_kernel) {
2340 TEST_REQUIRES_ARM_NEON_FMA;
2341 for (uint32_t n = 16; n <= 24; n += 8) {
2342 for (size_t k = 1; k <= 20; k += 5) {
2343 GemmMicrokernelTester()
2344 .mr(4)
2345 .nr(8)
2346 .kr(1)
2347 .sr(1)
2348 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002349 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002350 .k(k)
2351 .ks(3)
2352 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2353 }
2354 }
2355 }
2356
2357 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
2358 TEST_REQUIRES_ARM_NEON_FMA;
2359 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002360 for (uint32_t n = 1; n <= 8; n++) {
2361 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002362 GemmMicrokernelTester()
2363 .mr(4)
2364 .nr(8)
2365 .kr(1)
2366 .sr(1)
2367 .m(m)
2368 .n(n)
2369 .k(k)
2370 .cm_stride(11)
2371 .iterations(1)
2372 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2373 }
2374 }
2375 }
2376 }
2377
2378 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, a_offset) {
2379 TEST_REQUIRES_ARM_NEON_FMA;
2380 for (size_t k = 1; k <= 20; k += 5) {
2381 GemmMicrokernelTester()
2382 .mr(4)
2383 .nr(8)
2384 .kr(1)
2385 .sr(1)
2386 .m(4)
2387 .n(8)
2388 .k(k)
2389 .ks(3)
2390 .a_offset(83)
2391 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2392 }
2393 }
2394
2395 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, zero) {
2396 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002397 for (size_t k = 1; k <= 20; k += 5) {
2398 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002399 GemmMicrokernelTester()
2400 .mr(4)
2401 .nr(8)
2402 .kr(1)
2403 .sr(1)
2404 .m(4)
2405 .n(8)
2406 .k(k)
2407 .ks(3)
2408 .a_offset(83)
2409 .zero_index(mz)
2410 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2411 }
2412 }
2413 }
2414
2415 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
2416 TEST_REQUIRES_ARM_NEON_FMA;
2417 GemmMicrokernelTester()
2418 .mr(4)
2419 .nr(8)
2420 .kr(1)
2421 .sr(1)
2422 .m(4)
2423 .n(8)
2424 .k(4)
2425 .qmin(128)
2426 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2427 }
2428
2429 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
2430 TEST_REQUIRES_ARM_NEON_FMA;
2431 GemmMicrokernelTester()
2432 .mr(4)
2433 .nr(8)
2434 .kr(1)
2435 .sr(1)
2436 .m(4)
2437 .n(8)
2438 .k(4)
2439 .qmax(128)
2440 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2441 }
2442
2443 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
2444 TEST_REQUIRES_ARM_NEON_FMA;
2445 GemmMicrokernelTester()
2446 .mr(4)
2447 .nr(8)
2448 .kr(1)
2449 .sr(1)
2450 .m(4)
2451 .n(8)
2452 .k(4)
2453 .cm_stride(11)
2454 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
2455 }
2456#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2457
2458
2459#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2460 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
2461 TEST_REQUIRES_ARM_NEON_FMA;
2462 GemmMicrokernelTester()
2463 .mr(4)
2464 .nr(8)
2465 .kr(1)
2466 .sr(1)
2467 .m(4)
2468 .n(8)
2469 .k(8)
2470 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2471 }
2472
2473 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
2474 TEST_REQUIRES_ARM_NEON_FMA;
2475 GemmMicrokernelTester()
2476 .mr(4)
2477 .nr(8)
2478 .kr(1)
2479 .sr(1)
2480 .m(4)
2481 .n(8)
2482 .k(8)
2483 .cn_stride(11)
2484 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2485 }
2486
2487 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
2488 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002489 for (uint32_t n = 1; n <= 8; n++) {
2490 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002491 GemmMicrokernelTester()
2492 .mr(4)
2493 .nr(8)
2494 .kr(1)
2495 .sr(1)
2496 .m(m)
2497 .n(n)
2498 .k(8)
2499 .iterations(1)
2500 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2501 }
2502 }
2503 }
2504
2505 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
2506 TEST_REQUIRES_ARM_NEON_FMA;
2507 for (uint32_t m = 1; m <= 4; m++) {
2508 GemmMicrokernelTester()
2509 .mr(4)
2510 .nr(8)
2511 .kr(1)
2512 .sr(1)
2513 .m(m)
2514 .n(8)
2515 .k(8)
2516 .iterations(1)
2517 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2518 }
2519 }
2520
2521 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
2522 TEST_REQUIRES_ARM_NEON_FMA;
2523 for (uint32_t n = 1; n <= 8; n++) {
2524 GemmMicrokernelTester()
2525 .mr(4)
2526 .nr(8)
2527 .kr(1)
2528 .sr(1)
2529 .m(4)
2530 .n(n)
2531 .k(8)
2532 .iterations(1)
2533 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2534 }
2535 }
2536
2537 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
2538 TEST_REQUIRES_ARM_NEON_FMA;
2539 GemmMicrokernelTester()
2540 .mr(4)
2541 .nr(8)
2542 .kr(1)
2543 .sr(1)
2544 .m(4)
2545 .n(8)
2546 .k(16)
2547 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2548 }
2549
2550 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
2551 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002552 for (uint32_t n = 1; n <= 8; n++) {
2553 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002554 GemmMicrokernelTester()
2555 .mr(4)
2556 .nr(8)
2557 .kr(1)
2558 .sr(1)
2559 .m(m)
2560 .n(n)
2561 .k(16)
2562 .iterations(1)
2563 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2564 }
2565 }
2566 }
2567
2568 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
2569 TEST_REQUIRES_ARM_NEON_FMA;
2570 for (size_t k = 1; k < 16; k++) {
2571 GemmMicrokernelTester()
2572 .mr(4)
2573 .nr(8)
2574 .kr(1)
2575 .sr(1)
2576 .m(4)
2577 .n(8)
2578 .k(k)
2579 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2580 }
2581 }
2582
2583 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
2584 TEST_REQUIRES_ARM_NEON_FMA;
2585 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002586 for (uint32_t n = 1; n <= 8; n++) {
2587 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002588 GemmMicrokernelTester()
2589 .mr(4)
2590 .nr(8)
2591 .kr(1)
2592 .sr(1)
2593 .m(m)
2594 .n(n)
2595 .k(k)
2596 .iterations(1)
2597 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2598 }
2599 }
2600 }
2601 }
2602
2603 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
2604 TEST_REQUIRES_ARM_NEON_FMA;
2605 for (size_t k = 17; k < 32; k++) {
2606 GemmMicrokernelTester()
2607 .mr(4)
2608 .nr(8)
2609 .kr(1)
2610 .sr(1)
2611 .m(4)
2612 .n(8)
2613 .k(k)
2614 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2615 }
2616 }
2617
2618 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
2619 TEST_REQUIRES_ARM_NEON_FMA;
2620 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002621 for (uint32_t n = 1; n <= 8; n++) {
2622 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002623 GemmMicrokernelTester()
2624 .mr(4)
2625 .nr(8)
2626 .kr(1)
2627 .sr(1)
2628 .m(m)
2629 .n(n)
2630 .k(k)
2631 .iterations(1)
2632 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2633 }
2634 }
2635 }
2636 }
2637
2638 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
2639 TEST_REQUIRES_ARM_NEON_FMA;
2640 for (size_t k = 24; k <= 80; k += 8) {
2641 GemmMicrokernelTester()
2642 .mr(4)
2643 .nr(8)
2644 .kr(1)
2645 .sr(1)
2646 .m(4)
2647 .n(8)
2648 .k(k)
2649 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2650 }
2651 }
2652
2653 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
2654 TEST_REQUIRES_ARM_NEON_FMA;
2655 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002656 for (uint32_t n = 1; n <= 8; n++) {
2657 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002658 GemmMicrokernelTester()
2659 .mr(4)
2660 .nr(8)
2661 .kr(1)
2662 .sr(1)
2663 .m(m)
2664 .n(n)
2665 .k(k)
2666 .iterations(1)
2667 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2668 }
2669 }
2670 }
2671 }
2672
2673 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
2674 TEST_REQUIRES_ARM_NEON_FMA;
2675 for (uint32_t n = 9; n < 16; n++) {
2676 for (size_t k = 1; k <= 40; k += 9) {
2677 GemmMicrokernelTester()
2678 .mr(4)
2679 .nr(8)
2680 .kr(1)
2681 .sr(1)
2682 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002683 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002684 .k(k)
2685 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2686 }
2687 }
2688 }
2689
2690 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
2691 TEST_REQUIRES_ARM_NEON_FMA;
2692 for (uint32_t n = 9; n < 16; n++) {
2693 for (size_t k = 1; k <= 40; k += 9) {
2694 GemmMicrokernelTester()
2695 .mr(4)
2696 .nr(8)
2697 .kr(1)
2698 .sr(1)
2699 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002700 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002701 .k(k)
2702 .cn_stride(11)
2703 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2704 }
2705 }
2706 }
2707
2708 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
2709 TEST_REQUIRES_ARM_NEON_FMA;
2710 for (uint32_t n = 9; n < 16; n++) {
2711 for (size_t k = 1; k <= 40; k += 9) {
2712 for (uint32_t m = 1; m <= 4; m++) {
2713 GemmMicrokernelTester()
2714 .mr(4)
2715 .nr(8)
2716 .kr(1)
2717 .sr(1)
2718 .m(m)
2719 .n(n)
2720 .k(k)
2721 .iterations(1)
2722 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2723 }
2724 }
2725 }
2726 }
2727
2728 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
2729 TEST_REQUIRES_ARM_NEON_FMA;
2730 for (uint32_t n = 16; n <= 24; n += 8) {
2731 for (size_t k = 1; k <= 40; k += 9) {
2732 GemmMicrokernelTester()
2733 .mr(4)
2734 .nr(8)
2735 .kr(1)
2736 .sr(1)
2737 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002738 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002739 .k(k)
2740 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2741 }
2742 }
2743 }
2744
2745 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
2746 TEST_REQUIRES_ARM_NEON_FMA;
2747 for (uint32_t n = 16; n <= 24; n += 8) {
2748 for (size_t k = 1; k <= 40; k += 9) {
2749 GemmMicrokernelTester()
2750 .mr(4)
2751 .nr(8)
2752 .kr(1)
2753 .sr(1)
2754 .m(4)
2755 .n(n)
2756 .k(k)
2757 .cn_stride(11)
2758 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2759 }
2760 }
2761 }
2762
2763 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
2764 TEST_REQUIRES_ARM_NEON_FMA;
2765 for (uint32_t n = 16; n <= 24; n += 8) {
2766 for (size_t k = 1; k <= 40; k += 9) {
2767 for (uint32_t m = 1; m <= 4; m++) {
2768 GemmMicrokernelTester()
2769 .mr(4)
2770 .nr(8)
2771 .kr(1)
2772 .sr(1)
2773 .m(m)
2774 .n(n)
2775 .k(k)
2776 .iterations(1)
2777 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2778 }
2779 }
2780 }
2781 }
2782
2783 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
2784 TEST_REQUIRES_ARM_NEON_FMA;
2785 for (size_t k = 1; k <= 40; k += 9) {
2786 GemmMicrokernelTester()
2787 .mr(4)
2788 .nr(8)
2789 .kr(1)
2790 .sr(1)
2791 .m(4)
2792 .n(8)
2793 .k(k)
2794 .ks(3)
2795 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2796 }
2797 }
2798
2799 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
2800 TEST_REQUIRES_ARM_NEON_FMA;
2801 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002802 for (uint32_t n = 1; n <= 8; n++) {
2803 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002804 GemmMicrokernelTester()
2805 .mr(4)
2806 .nr(8)
2807 .kr(1)
2808 .sr(1)
2809 .m(m)
2810 .n(n)
2811 .k(k)
2812 .ks(3)
2813 .iterations(1)
2814 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2815 }
2816 }
2817 }
2818 }
2819
2820 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
2821 TEST_REQUIRES_ARM_NEON_FMA;
2822 for (uint32_t n = 9; n < 16; n++) {
2823 for (size_t k = 1; k <= 40; k += 9) {
2824 GemmMicrokernelTester()
2825 .mr(4)
2826 .nr(8)
2827 .kr(1)
2828 .sr(1)
2829 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002830 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002831 .k(k)
2832 .ks(3)
2833 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2834 }
2835 }
2836 }
2837
2838 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
2839 TEST_REQUIRES_ARM_NEON_FMA;
2840 for (uint32_t n = 16; n <= 24; n += 8) {
2841 for (size_t k = 1; k <= 40; k += 9) {
2842 GemmMicrokernelTester()
2843 .mr(4)
2844 .nr(8)
2845 .kr(1)
2846 .sr(1)
2847 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002848 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002849 .k(k)
2850 .ks(3)
2851 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2852 }
2853 }
2854 }
2855
2856 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
2857 TEST_REQUIRES_ARM_NEON_FMA;
2858 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002859 for (uint32_t n = 1; n <= 8; n++) {
2860 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002861 GemmMicrokernelTester()
2862 .mr(4)
2863 .nr(8)
2864 .kr(1)
2865 .sr(1)
2866 .m(m)
2867 .n(n)
2868 .k(k)
2869 .cm_stride(11)
2870 .iterations(1)
2871 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2872 }
2873 }
2874 }
2875 }
2876
2877 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
2878 TEST_REQUIRES_ARM_NEON_FMA;
2879 for (size_t k = 1; k <= 40; k += 9) {
2880 GemmMicrokernelTester()
2881 .mr(4)
2882 .nr(8)
2883 .kr(1)
2884 .sr(1)
2885 .m(4)
2886 .n(8)
2887 .k(k)
2888 .ks(3)
2889 .a_offset(163)
2890 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2891 }
2892 }
2893
2894 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
2895 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002896 for (size_t k = 1; k <= 40; k += 9) {
2897 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002898 GemmMicrokernelTester()
2899 .mr(4)
2900 .nr(8)
2901 .kr(1)
2902 .sr(1)
2903 .m(4)
2904 .n(8)
2905 .k(k)
2906 .ks(3)
2907 .a_offset(163)
2908 .zero_index(mz)
2909 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2910 }
2911 }
2912 }
2913
2914 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
2915 TEST_REQUIRES_ARM_NEON_FMA;
2916 GemmMicrokernelTester()
2917 .mr(4)
2918 .nr(8)
2919 .kr(1)
2920 .sr(1)
2921 .m(4)
2922 .n(8)
2923 .k(8)
2924 .qmin(128)
2925 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2926 }
2927
2928 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
2929 TEST_REQUIRES_ARM_NEON_FMA;
2930 GemmMicrokernelTester()
2931 .mr(4)
2932 .nr(8)
2933 .kr(1)
2934 .sr(1)
2935 .m(4)
2936 .n(8)
2937 .k(8)
2938 .qmax(128)
2939 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2940 }
2941
2942 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
2943 TEST_REQUIRES_ARM_NEON_FMA;
2944 GemmMicrokernelTester()
2945 .mr(4)
2946 .nr(8)
2947 .kr(1)
2948 .sr(1)
2949 .m(4)
2950 .n(8)
2951 .k(8)
2952 .cm_stride(11)
2953 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
2954 }
2955#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2956
2957
2958#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2959 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
2960 TEST_REQUIRES_ARM_NEON_FMA;
2961 GemmMicrokernelTester()
2962 .mr(4)
2963 .nr(8)
2964 .kr(1)
2965 .sr(1)
2966 .m(4)
2967 .n(8)
2968 .k(8)
2969 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2970 }
2971
2972 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
2973 TEST_REQUIRES_ARM_NEON_FMA;
2974 GemmMicrokernelTester()
2975 .mr(4)
2976 .nr(8)
2977 .kr(1)
2978 .sr(1)
2979 .m(4)
2980 .n(8)
2981 .k(8)
2982 .cn_stride(11)
2983 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
2984 }
2985
2986 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
2987 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002988 for (uint32_t n = 1; n <= 8; n++) {
2989 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002990 GemmMicrokernelTester()
2991 .mr(4)
2992 .nr(8)
2993 .kr(1)
2994 .sr(1)
2995 .m(m)
2996 .n(n)
2997 .k(8)
2998 .iterations(1)
2999 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3000 }
3001 }
3002 }
3003
3004 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
3005 TEST_REQUIRES_ARM_NEON_FMA;
3006 for (uint32_t m = 1; m <= 4; m++) {
3007 GemmMicrokernelTester()
3008 .mr(4)
3009 .nr(8)
3010 .kr(1)
3011 .sr(1)
3012 .m(m)
3013 .n(8)
3014 .k(8)
3015 .iterations(1)
3016 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3017 }
3018 }
3019
3020 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
3021 TEST_REQUIRES_ARM_NEON_FMA;
3022 for (uint32_t n = 1; n <= 8; n++) {
3023 GemmMicrokernelTester()
3024 .mr(4)
3025 .nr(8)
3026 .kr(1)
3027 .sr(1)
3028 .m(4)
3029 .n(n)
3030 .k(8)
3031 .iterations(1)
3032 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3033 }
3034 }
3035
3036 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
3037 TEST_REQUIRES_ARM_NEON_FMA;
3038 GemmMicrokernelTester()
3039 .mr(4)
3040 .nr(8)
3041 .kr(1)
3042 .sr(1)
3043 .m(4)
3044 .n(8)
3045 .k(16)
3046 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3047 }
3048
3049 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
3050 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003051 for (uint32_t n = 1; n <= 8; n++) {
3052 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003053 GemmMicrokernelTester()
3054 .mr(4)
3055 .nr(8)
3056 .kr(1)
3057 .sr(1)
3058 .m(m)
3059 .n(n)
3060 .k(16)
3061 .iterations(1)
3062 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3063 }
3064 }
3065 }
3066
3067 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
3068 TEST_REQUIRES_ARM_NEON_FMA;
3069 for (size_t k = 1; k < 16; k++) {
3070 GemmMicrokernelTester()
3071 .mr(4)
3072 .nr(8)
3073 .kr(1)
3074 .sr(1)
3075 .m(4)
3076 .n(8)
3077 .k(k)
3078 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3079 }
3080 }
3081
3082 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
3083 TEST_REQUIRES_ARM_NEON_FMA;
3084 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003085 for (uint32_t n = 1; n <= 8; n++) {
3086 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003087 GemmMicrokernelTester()
3088 .mr(4)
3089 .nr(8)
3090 .kr(1)
3091 .sr(1)
3092 .m(m)
3093 .n(n)
3094 .k(k)
3095 .iterations(1)
3096 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3097 }
3098 }
3099 }
3100 }
3101
3102 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
3103 TEST_REQUIRES_ARM_NEON_FMA;
3104 for (size_t k = 17; k < 32; k++) {
3105 GemmMicrokernelTester()
3106 .mr(4)
3107 .nr(8)
3108 .kr(1)
3109 .sr(1)
3110 .m(4)
3111 .n(8)
3112 .k(k)
3113 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3114 }
3115 }
3116
3117 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
3118 TEST_REQUIRES_ARM_NEON_FMA;
3119 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003120 for (uint32_t n = 1; n <= 8; n++) {
3121 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003122 GemmMicrokernelTester()
3123 .mr(4)
3124 .nr(8)
3125 .kr(1)
3126 .sr(1)
3127 .m(m)
3128 .n(n)
3129 .k(k)
3130 .iterations(1)
3131 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3132 }
3133 }
3134 }
3135 }
3136
3137 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
3138 TEST_REQUIRES_ARM_NEON_FMA;
3139 for (size_t k = 24; k <= 80; k += 8) {
3140 GemmMicrokernelTester()
3141 .mr(4)
3142 .nr(8)
3143 .kr(1)
3144 .sr(1)
3145 .m(4)
3146 .n(8)
3147 .k(k)
3148 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3149 }
3150 }
3151
3152 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
3153 TEST_REQUIRES_ARM_NEON_FMA;
3154 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003155 for (uint32_t n = 1; n <= 8; n++) {
3156 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003157 GemmMicrokernelTester()
3158 .mr(4)
3159 .nr(8)
3160 .kr(1)
3161 .sr(1)
3162 .m(m)
3163 .n(n)
3164 .k(k)
3165 .iterations(1)
3166 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3167 }
3168 }
3169 }
3170 }
3171
3172 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
3173 TEST_REQUIRES_ARM_NEON_FMA;
3174 for (uint32_t n = 9; n < 16; n++) {
3175 for (size_t k = 1; k <= 40; k += 9) {
3176 GemmMicrokernelTester()
3177 .mr(4)
3178 .nr(8)
3179 .kr(1)
3180 .sr(1)
3181 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003182 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003183 .k(k)
3184 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3185 }
3186 }
3187 }
3188
3189 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
3190 TEST_REQUIRES_ARM_NEON_FMA;
3191 for (uint32_t n = 9; n < 16; n++) {
3192 for (size_t k = 1; k <= 40; k += 9) {
3193 GemmMicrokernelTester()
3194 .mr(4)
3195 .nr(8)
3196 .kr(1)
3197 .sr(1)
3198 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003199 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003200 .k(k)
3201 .cn_stride(11)
3202 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3203 }
3204 }
3205 }
3206
3207 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
3208 TEST_REQUIRES_ARM_NEON_FMA;
3209 for (uint32_t n = 9; n < 16; n++) {
3210 for (size_t k = 1; k <= 40; k += 9) {
3211 for (uint32_t m = 1; m <= 4; m++) {
3212 GemmMicrokernelTester()
3213 .mr(4)
3214 .nr(8)
3215 .kr(1)
3216 .sr(1)
3217 .m(m)
3218 .n(n)
3219 .k(k)
3220 .iterations(1)
3221 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3222 }
3223 }
3224 }
3225 }
3226
3227 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
3228 TEST_REQUIRES_ARM_NEON_FMA;
3229 for (uint32_t n = 16; n <= 24; n += 8) {
3230 for (size_t k = 1; k <= 40; k += 9) {
3231 GemmMicrokernelTester()
3232 .mr(4)
3233 .nr(8)
3234 .kr(1)
3235 .sr(1)
3236 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003237 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003238 .k(k)
3239 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3240 }
3241 }
3242 }
3243
3244 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
3245 TEST_REQUIRES_ARM_NEON_FMA;
3246 for (uint32_t n = 16; n <= 24; n += 8) {
3247 for (size_t k = 1; k <= 40; k += 9) {
3248 GemmMicrokernelTester()
3249 .mr(4)
3250 .nr(8)
3251 .kr(1)
3252 .sr(1)
3253 .m(4)
3254 .n(n)
3255 .k(k)
3256 .cn_stride(11)
3257 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3258 }
3259 }
3260 }
3261
3262 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
3263 TEST_REQUIRES_ARM_NEON_FMA;
3264 for (uint32_t n = 16; n <= 24; n += 8) {
3265 for (size_t k = 1; k <= 40; k += 9) {
3266 for (uint32_t m = 1; m <= 4; m++) {
3267 GemmMicrokernelTester()
3268 .mr(4)
3269 .nr(8)
3270 .kr(1)
3271 .sr(1)
3272 .m(m)
3273 .n(n)
3274 .k(k)
3275 .iterations(1)
3276 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3277 }
3278 }
3279 }
3280 }
3281
3282 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
3283 TEST_REQUIRES_ARM_NEON_FMA;
3284 for (size_t k = 1; k <= 40; k += 9) {
3285 GemmMicrokernelTester()
3286 .mr(4)
3287 .nr(8)
3288 .kr(1)
3289 .sr(1)
3290 .m(4)
3291 .n(8)
3292 .k(k)
3293 .ks(3)
3294 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3295 }
3296 }
3297
3298 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
3299 TEST_REQUIRES_ARM_NEON_FMA;
3300 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003301 for (uint32_t n = 1; n <= 8; n++) {
3302 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003303 GemmMicrokernelTester()
3304 .mr(4)
3305 .nr(8)
3306 .kr(1)
3307 .sr(1)
3308 .m(m)
3309 .n(n)
3310 .k(k)
3311 .ks(3)
3312 .iterations(1)
3313 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3314 }
3315 }
3316 }
3317 }
3318
3319 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
3320 TEST_REQUIRES_ARM_NEON_FMA;
3321 for (uint32_t n = 9; n < 16; n++) {
3322 for (size_t k = 1; k <= 40; k += 9) {
3323 GemmMicrokernelTester()
3324 .mr(4)
3325 .nr(8)
3326 .kr(1)
3327 .sr(1)
3328 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003329 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003330 .k(k)
3331 .ks(3)
3332 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3333 }
3334 }
3335 }
3336
3337 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
3338 TEST_REQUIRES_ARM_NEON_FMA;
3339 for (uint32_t n = 16; n <= 24; n += 8) {
3340 for (size_t k = 1; k <= 40; k += 9) {
3341 GemmMicrokernelTester()
3342 .mr(4)
3343 .nr(8)
3344 .kr(1)
3345 .sr(1)
3346 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003347 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003348 .k(k)
3349 .ks(3)
3350 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3351 }
3352 }
3353 }
3354
3355 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
3356 TEST_REQUIRES_ARM_NEON_FMA;
3357 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003358 for (uint32_t n = 1; n <= 8; n++) {
3359 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003360 GemmMicrokernelTester()
3361 .mr(4)
3362 .nr(8)
3363 .kr(1)
3364 .sr(1)
3365 .m(m)
3366 .n(n)
3367 .k(k)
3368 .cm_stride(11)
3369 .iterations(1)
3370 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3371 }
3372 }
3373 }
3374 }
3375
3376 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
3377 TEST_REQUIRES_ARM_NEON_FMA;
3378 for (size_t k = 1; k <= 40; k += 9) {
3379 GemmMicrokernelTester()
3380 .mr(4)
3381 .nr(8)
3382 .kr(1)
3383 .sr(1)
3384 .m(4)
3385 .n(8)
3386 .k(k)
3387 .ks(3)
3388 .a_offset(163)
3389 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3390 }
3391 }
3392
3393 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
3394 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003395 for (size_t k = 1; k <= 40; k += 9) {
3396 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003397 GemmMicrokernelTester()
3398 .mr(4)
3399 .nr(8)
3400 .kr(1)
3401 .sr(1)
3402 .m(4)
3403 .n(8)
3404 .k(k)
3405 .ks(3)
3406 .a_offset(163)
3407 .zero_index(mz)
3408 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3409 }
3410 }
3411 }
3412
3413 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
3414 TEST_REQUIRES_ARM_NEON_FMA;
3415 GemmMicrokernelTester()
3416 .mr(4)
3417 .nr(8)
3418 .kr(1)
3419 .sr(1)
3420 .m(4)
3421 .n(8)
3422 .k(8)
3423 .qmin(128)
3424 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3425 }
3426
3427 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
3428 TEST_REQUIRES_ARM_NEON_FMA;
3429 GemmMicrokernelTester()
3430 .mr(4)
3431 .nr(8)
3432 .kr(1)
3433 .sr(1)
3434 .m(4)
3435 .n(8)
3436 .k(8)
3437 .qmax(128)
3438 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3439 }
3440
3441 TEST(F32_IGEMM_MINMAX_4X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
3442 TEST_REQUIRES_ARM_NEON_FMA;
3443 GemmMicrokernelTester()
3444 .mr(4)
3445 .nr(8)
3446 .kr(1)
3447 .sr(1)
3448 .m(4)
3449 .n(8)
3450 .k(8)
3451 .cm_stride(11)
3452 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
3453 }
3454#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3455
3456
3457#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
3458 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2) {
3459 TEST_REQUIRES_ARM_NEON;
3460 GemmMicrokernelTester()
3461 .mr(4)
3462 .nr(8)
3463 .kr(1)
3464 .sr(1)
3465 .m(4)
3466 .n(8)
3467 .k(2)
3468 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3469 }
3470
3471 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, strided_cn) {
3472 TEST_REQUIRES_ARM_NEON;
3473 GemmMicrokernelTester()
3474 .mr(4)
3475 .nr(8)
3476 .kr(1)
3477 .sr(1)
3478 .m(4)
3479 .n(8)
3480 .k(2)
3481 .cn_stride(11)
3482 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3483 }
3484
3485 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2_subtile) {
3486 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003487 for (uint32_t n = 1; n <= 8; n++) {
3488 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003489 GemmMicrokernelTester()
3490 .mr(4)
3491 .nr(8)
3492 .kr(1)
3493 .sr(1)
3494 .m(m)
3495 .n(n)
3496 .k(2)
3497 .iterations(1)
3498 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3499 }
3500 }
3501 }
3502
3503 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_m) {
3504 TEST_REQUIRES_ARM_NEON;
3505 for (uint32_t m = 1; m <= 4; m++) {
3506 GemmMicrokernelTester()
3507 .mr(4)
3508 .nr(8)
3509 .kr(1)
3510 .sr(1)
3511 .m(m)
3512 .n(8)
3513 .k(2)
3514 .iterations(1)
3515 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3516 }
3517 }
3518
3519 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_eq_2_subtile_n) {
3520 TEST_REQUIRES_ARM_NEON;
3521 for (uint32_t n = 1; n <= 8; n++) {
3522 GemmMicrokernelTester()
3523 .mr(4)
3524 .nr(8)
3525 .kr(1)
3526 .sr(1)
3527 .m(4)
3528 .n(n)
3529 .k(2)
3530 .iterations(1)
3531 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3532 }
3533 }
3534
3535 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_lt_2) {
3536 TEST_REQUIRES_ARM_NEON;
3537 for (size_t k = 1; k < 2; k++) {
3538 GemmMicrokernelTester()
3539 .mr(4)
3540 .nr(8)
3541 .kr(1)
3542 .sr(1)
3543 .m(4)
3544 .n(8)
3545 .k(k)
3546 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3547 }
3548 }
3549
3550 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_lt_2_subtile) {
3551 TEST_REQUIRES_ARM_NEON;
3552 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003553 for (uint32_t n = 1; n <= 8; n++) {
3554 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003555 GemmMicrokernelTester()
3556 .mr(4)
3557 .nr(8)
3558 .kr(1)
3559 .sr(1)
3560 .m(m)
3561 .n(n)
3562 .k(k)
3563 .iterations(1)
3564 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3565 }
3566 }
3567 }
3568 }
3569
3570 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_gt_2) {
3571 TEST_REQUIRES_ARM_NEON;
3572 for (size_t k = 3; k < 4; k++) {
3573 GemmMicrokernelTester()
3574 .mr(4)
3575 .nr(8)
3576 .kr(1)
3577 .sr(1)
3578 .m(4)
3579 .n(8)
3580 .k(k)
3581 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3582 }
3583 }
3584
3585 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_gt_2_subtile) {
3586 TEST_REQUIRES_ARM_NEON;
3587 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003588 for (uint32_t n = 1; n <= 8; n++) {
3589 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003590 GemmMicrokernelTester()
3591 .mr(4)
3592 .nr(8)
3593 .kr(1)
3594 .sr(1)
3595 .m(m)
3596 .n(n)
3597 .k(k)
3598 .iterations(1)
3599 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3600 }
3601 }
3602 }
3603 }
3604
3605 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_div_2) {
3606 TEST_REQUIRES_ARM_NEON;
3607 for (size_t k = 4; k <= 20; k += 2) {
3608 GemmMicrokernelTester()
3609 .mr(4)
3610 .nr(8)
3611 .kr(1)
3612 .sr(1)
3613 .m(4)
3614 .n(8)
3615 .k(k)
3616 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3617 }
3618 }
3619
3620 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, k_div_2_subtile) {
3621 TEST_REQUIRES_ARM_NEON;
3622 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003623 for (uint32_t n = 1; n <= 8; n++) {
3624 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003625 GemmMicrokernelTester()
3626 .mr(4)
3627 .nr(8)
3628 .kr(1)
3629 .sr(1)
3630 .m(m)
3631 .n(n)
3632 .k(k)
3633 .iterations(1)
3634 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3635 }
3636 }
3637 }
3638 }
3639
3640 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8) {
3641 TEST_REQUIRES_ARM_NEON;
3642 for (uint32_t n = 9; n < 16; n++) {
3643 for (size_t k = 1; k <= 10; k += 3) {
3644 GemmMicrokernelTester()
3645 .mr(4)
3646 .nr(8)
3647 .kr(1)
3648 .sr(1)
3649 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003650 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003651 .k(k)
3652 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3653 }
3654 }
3655 }
3656
3657 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8_strided_cn) {
3658 TEST_REQUIRES_ARM_NEON;
3659 for (uint32_t n = 9; n < 16; n++) {
3660 for (size_t k = 1; k <= 10; k += 3) {
3661 GemmMicrokernelTester()
3662 .mr(4)
3663 .nr(8)
3664 .kr(1)
3665 .sr(1)
3666 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003667 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003668 .k(k)
3669 .cn_stride(11)
3670 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3671 }
3672 }
3673 }
3674
3675 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8_subtile) {
3676 TEST_REQUIRES_ARM_NEON;
3677 for (uint32_t n = 9; n < 16; n++) {
3678 for (size_t k = 1; k <= 10; k += 3) {
3679 for (uint32_t m = 1; m <= 4; m++) {
3680 GemmMicrokernelTester()
3681 .mr(4)
3682 .nr(8)
3683 .kr(1)
3684 .sr(1)
3685 .m(m)
3686 .n(n)
3687 .k(k)
3688 .iterations(1)
3689 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3690 }
3691 }
3692 }
3693 }
3694
3695 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8) {
3696 TEST_REQUIRES_ARM_NEON;
3697 for (uint32_t n = 16; n <= 24; n += 8) {
3698 for (size_t k = 1; k <= 10; k += 3) {
3699 GemmMicrokernelTester()
3700 .mr(4)
3701 .nr(8)
3702 .kr(1)
3703 .sr(1)
3704 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003705 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003706 .k(k)
3707 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3708 }
3709 }
3710 }
3711
3712 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8_strided_cn) {
3713 TEST_REQUIRES_ARM_NEON;
3714 for (uint32_t n = 16; n <= 24; n += 8) {
3715 for (size_t k = 1; k <= 10; k += 3) {
3716 GemmMicrokernelTester()
3717 .mr(4)
3718 .nr(8)
3719 .kr(1)
3720 .sr(1)
3721 .m(4)
3722 .n(n)
3723 .k(k)
3724 .cn_stride(11)
3725 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3726 }
3727 }
3728 }
3729
3730 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8_subtile) {
3731 TEST_REQUIRES_ARM_NEON;
3732 for (uint32_t n = 16; n <= 24; n += 8) {
3733 for (size_t k = 1; k <= 10; k += 3) {
3734 for (uint32_t m = 1; m <= 4; m++) {
3735 GemmMicrokernelTester()
3736 .mr(4)
3737 .nr(8)
3738 .kr(1)
3739 .sr(1)
3740 .m(m)
3741 .n(n)
3742 .k(k)
3743 .iterations(1)
3744 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3745 }
3746 }
3747 }
3748 }
3749
3750 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, small_kernel) {
3751 TEST_REQUIRES_ARM_NEON;
3752 for (size_t k = 1; k <= 10; k += 3) {
3753 GemmMicrokernelTester()
3754 .mr(4)
3755 .nr(8)
3756 .kr(1)
3757 .sr(1)
3758 .m(4)
3759 .n(8)
3760 .k(k)
3761 .ks(3)
3762 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3763 }
3764 }
3765
3766 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, small_kernel_subtile) {
3767 TEST_REQUIRES_ARM_NEON;
3768 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003769 for (uint32_t n = 1; n <= 8; n++) {
3770 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003771 GemmMicrokernelTester()
3772 .mr(4)
3773 .nr(8)
3774 .kr(1)
3775 .sr(1)
3776 .m(m)
3777 .n(n)
3778 .k(k)
3779 .ks(3)
3780 .iterations(1)
3781 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3782 }
3783 }
3784 }
3785 }
3786
3787 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_gt_8_small_kernel) {
3788 TEST_REQUIRES_ARM_NEON;
3789 for (uint32_t n = 9; n < 16; n++) {
3790 for (size_t k = 1; k <= 10; k += 3) {
3791 GemmMicrokernelTester()
3792 .mr(4)
3793 .nr(8)
3794 .kr(1)
3795 .sr(1)
3796 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003797 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003798 .k(k)
3799 .ks(3)
3800 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3801 }
3802 }
3803 }
3804
3805 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, n_div_8_small_kernel) {
3806 TEST_REQUIRES_ARM_NEON;
3807 for (uint32_t n = 16; n <= 24; n += 8) {
3808 for (size_t k = 1; k <= 10; k += 3) {
3809 GemmMicrokernelTester()
3810 .mr(4)
3811 .nr(8)
3812 .kr(1)
3813 .sr(1)
3814 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003815 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003816 .k(k)
3817 .ks(3)
3818 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3819 }
3820 }
3821 }
3822
3823 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, strided_cm_subtile) {
3824 TEST_REQUIRES_ARM_NEON;
3825 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003826 for (uint32_t n = 1; n <= 8; n++) {
3827 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003828 GemmMicrokernelTester()
3829 .mr(4)
3830 .nr(8)
3831 .kr(1)
3832 .sr(1)
3833 .m(m)
3834 .n(n)
3835 .k(k)
3836 .cm_stride(11)
3837 .iterations(1)
3838 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3839 }
3840 }
3841 }
3842 }
3843
3844 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, a_offset) {
3845 TEST_REQUIRES_ARM_NEON;
3846 for (size_t k = 1; k <= 10; k += 3) {
3847 GemmMicrokernelTester()
3848 .mr(4)
3849 .nr(8)
3850 .kr(1)
3851 .sr(1)
3852 .m(4)
3853 .n(8)
3854 .k(k)
3855 .ks(3)
3856 .a_offset(43)
3857 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3858 }
3859 }
3860
3861 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, zero) {
3862 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003863 for (size_t k = 1; k <= 10; k += 3) {
3864 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003865 GemmMicrokernelTester()
3866 .mr(4)
3867 .nr(8)
3868 .kr(1)
3869 .sr(1)
3870 .m(4)
3871 .n(8)
3872 .k(k)
3873 .ks(3)
3874 .a_offset(43)
3875 .zero_index(mz)
3876 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3877 }
3878 }
3879 }
3880
3881 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, qmin) {
3882 TEST_REQUIRES_ARM_NEON;
3883 GemmMicrokernelTester()
3884 .mr(4)
3885 .nr(8)
3886 .kr(1)
3887 .sr(1)
3888 .m(4)
3889 .n(8)
3890 .k(2)
3891 .qmin(128)
3892 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3893 }
3894
3895 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, qmax) {
3896 TEST_REQUIRES_ARM_NEON;
3897 GemmMicrokernelTester()
3898 .mr(4)
3899 .nr(8)
3900 .kr(1)
3901 .sr(1)
3902 .m(4)
3903 .n(8)
3904 .k(2)
3905 .qmax(128)
3906 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3907 }
3908
3909 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_LD64, strided_cm) {
3910 TEST_REQUIRES_ARM_NEON;
3911 GemmMicrokernelTester()
3912 .mr(4)
3913 .nr(8)
3914 .kr(1)
3915 .sr(1)
3916 .m(4)
3917 .n(8)
3918 .k(2)
3919 .cm_stride(11)
3920 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, xnn_init_f32_minmax_scalar_params);
3921 }
3922#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
3923
3924
3925#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
3926 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4) {
3927 TEST_REQUIRES_ARM_NEON;
3928 GemmMicrokernelTester()
3929 .mr(4)
3930 .nr(8)
3931 .kr(1)
3932 .sr(1)
3933 .m(4)
3934 .n(8)
3935 .k(4)
3936 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
3937 }
3938
3939 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, strided_cn) {
3940 TEST_REQUIRES_ARM_NEON;
3941 GemmMicrokernelTester()
3942 .mr(4)
3943 .nr(8)
3944 .kr(1)
3945 .sr(1)
3946 .m(4)
3947 .n(8)
3948 .k(4)
3949 .cn_stride(11)
3950 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
3951 }
3952
3953 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile) {
3954 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003955 for (uint32_t n = 1; n <= 8; n++) {
3956 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003957 GemmMicrokernelTester()
3958 .mr(4)
3959 .nr(8)
3960 .kr(1)
3961 .sr(1)
3962 .m(m)
3963 .n(n)
3964 .k(4)
3965 .iterations(1)
3966 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
3967 }
3968 }
3969 }
3970
3971 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_m) {
3972 TEST_REQUIRES_ARM_NEON;
3973 for (uint32_t m = 1; m <= 4; m++) {
3974 GemmMicrokernelTester()
3975 .mr(4)
3976 .nr(8)
3977 .kr(1)
3978 .sr(1)
3979 .m(m)
3980 .n(8)
3981 .k(4)
3982 .iterations(1)
3983 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
3984 }
3985 }
3986
3987 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_n) {
3988 TEST_REQUIRES_ARM_NEON;
3989 for (uint32_t n = 1; n <= 8; n++) {
3990 GemmMicrokernelTester()
3991 .mr(4)
3992 .nr(8)
3993 .kr(1)
3994 .sr(1)
3995 .m(4)
3996 .n(n)
3997 .k(4)
3998 .iterations(1)
3999 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4000 }
4001 }
4002
4003 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8) {
4004 TEST_REQUIRES_ARM_NEON;
4005 GemmMicrokernelTester()
4006 .mr(4)
4007 .nr(8)
4008 .kr(1)
4009 .sr(1)
4010 .m(4)
4011 .n(8)
4012 .k(8)
4013 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4014 }
4015
4016 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8_subtile) {
4017 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004018 for (uint32_t n = 1; n <= 8; n++) {
4019 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004020 GemmMicrokernelTester()
4021 .mr(4)
4022 .nr(8)
4023 .kr(1)
4024 .sr(1)
4025 .m(m)
4026 .n(n)
4027 .k(8)
4028 .iterations(1)
4029 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4030 }
4031 }
4032 }
4033
4034 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8) {
4035 TEST_REQUIRES_ARM_NEON;
4036 for (size_t k = 1; k < 8; k++) {
4037 GemmMicrokernelTester()
4038 .mr(4)
4039 .nr(8)
4040 .kr(1)
4041 .sr(1)
4042 .m(4)
4043 .n(8)
4044 .k(k)
4045 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4046 }
4047 }
4048
4049 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8_subtile) {
4050 TEST_REQUIRES_ARM_NEON;
4051 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004052 for (uint32_t n = 1; n <= 8; n++) {
4053 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004054 GemmMicrokernelTester()
4055 .mr(4)
4056 .nr(8)
4057 .kr(1)
4058 .sr(1)
4059 .m(m)
4060 .n(n)
4061 .k(k)
4062 .iterations(1)
4063 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4064 }
4065 }
4066 }
4067 }
4068
4069 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8) {
4070 TEST_REQUIRES_ARM_NEON;
4071 for (size_t k = 9; k < 16; k++) {
4072 GemmMicrokernelTester()
4073 .mr(4)
4074 .nr(8)
4075 .kr(1)
4076 .sr(1)
4077 .m(4)
4078 .n(8)
4079 .k(k)
4080 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4081 }
4082 }
4083
4084 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8_subtile) {
4085 TEST_REQUIRES_ARM_NEON;
4086 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004087 for (uint32_t n = 1; n <= 8; n++) {
4088 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004089 GemmMicrokernelTester()
4090 .mr(4)
4091 .nr(8)
4092 .kr(1)
4093 .sr(1)
4094 .m(m)
4095 .n(n)
4096 .k(k)
4097 .iterations(1)
4098 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4099 }
4100 }
4101 }
4102 }
4103
4104 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_div_4) {
4105 TEST_REQUIRES_ARM_NEON;
4106 for (size_t k = 12; k <= 40; k += 4) {
4107 GemmMicrokernelTester()
4108 .mr(4)
4109 .nr(8)
4110 .kr(1)
4111 .sr(1)
4112 .m(4)
4113 .n(8)
4114 .k(k)
4115 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4116 }
4117 }
4118
4119 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, k_div_4_subtile) {
4120 TEST_REQUIRES_ARM_NEON;
4121 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004122 for (uint32_t n = 1; n <= 8; n++) {
4123 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004124 GemmMicrokernelTester()
4125 .mr(4)
4126 .nr(8)
4127 .kr(1)
4128 .sr(1)
4129 .m(m)
4130 .n(n)
4131 .k(k)
4132 .iterations(1)
4133 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4134 }
4135 }
4136 }
4137 }
4138
4139 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8) {
4140 TEST_REQUIRES_ARM_NEON;
4141 for (uint32_t n = 9; n < 16; n++) {
4142 for (size_t k = 1; k <= 20; k += 5) {
4143 GemmMicrokernelTester()
4144 .mr(4)
4145 .nr(8)
4146 .kr(1)
4147 .sr(1)
4148 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004149 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004150 .k(k)
4151 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4152 }
4153 }
4154 }
4155
4156 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_strided_cn) {
4157 TEST_REQUIRES_ARM_NEON;
4158 for (uint32_t n = 9; n < 16; n++) {
4159 for (size_t k = 1; k <= 20; k += 5) {
4160 GemmMicrokernelTester()
4161 .mr(4)
4162 .nr(8)
4163 .kr(1)
4164 .sr(1)
4165 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004166 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004167 .k(k)
4168 .cn_stride(11)
4169 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4170 }
4171 }
4172 }
4173
4174 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_subtile) {
4175 TEST_REQUIRES_ARM_NEON;
4176 for (uint32_t n = 9; n < 16; n++) {
4177 for (size_t k = 1; k <= 20; k += 5) {
4178 for (uint32_t m = 1; m <= 4; m++) {
4179 GemmMicrokernelTester()
4180 .mr(4)
4181 .nr(8)
4182 .kr(1)
4183 .sr(1)
4184 .m(m)
4185 .n(n)
4186 .k(k)
4187 .iterations(1)
4188 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4189 }
4190 }
4191 }
4192 }
4193
4194 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8) {
4195 TEST_REQUIRES_ARM_NEON;
4196 for (uint32_t n = 16; n <= 24; n += 8) {
4197 for (size_t k = 1; k <= 20; k += 5) {
4198 GemmMicrokernelTester()
4199 .mr(4)
4200 .nr(8)
4201 .kr(1)
4202 .sr(1)
4203 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004204 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004205 .k(k)
4206 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4207 }
4208 }
4209 }
4210
4211 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_strided_cn) {
4212 TEST_REQUIRES_ARM_NEON;
4213 for (uint32_t n = 16; n <= 24; n += 8) {
4214 for (size_t k = 1; k <= 20; k += 5) {
4215 GemmMicrokernelTester()
4216 .mr(4)
4217 .nr(8)
4218 .kr(1)
4219 .sr(1)
4220 .m(4)
4221 .n(n)
4222 .k(k)
4223 .cn_stride(11)
4224 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4225 }
4226 }
4227 }
4228
4229 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_subtile) {
4230 TEST_REQUIRES_ARM_NEON;
4231 for (uint32_t n = 16; n <= 24; n += 8) {
4232 for (size_t k = 1; k <= 20; k += 5) {
4233 for (uint32_t m = 1; m <= 4; m++) {
4234 GemmMicrokernelTester()
4235 .mr(4)
4236 .nr(8)
4237 .kr(1)
4238 .sr(1)
4239 .m(m)
4240 .n(n)
4241 .k(k)
4242 .iterations(1)
4243 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4244 }
4245 }
4246 }
4247 }
4248
4249 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, small_kernel) {
4250 TEST_REQUIRES_ARM_NEON;
4251 for (size_t k = 1; k <= 20; k += 5) {
4252 GemmMicrokernelTester()
4253 .mr(4)
4254 .nr(8)
4255 .kr(1)
4256 .sr(1)
4257 .m(4)
4258 .n(8)
4259 .k(k)
4260 .ks(3)
4261 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4262 }
4263 }
4264
4265 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, small_kernel_subtile) {
4266 TEST_REQUIRES_ARM_NEON;
4267 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004268 for (uint32_t n = 1; n <= 8; n++) {
4269 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004270 GemmMicrokernelTester()
4271 .mr(4)
4272 .nr(8)
4273 .kr(1)
4274 .sr(1)
4275 .m(m)
4276 .n(n)
4277 .k(k)
4278 .ks(3)
4279 .iterations(1)
4280 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4281 }
4282 }
4283 }
4284 }
4285
4286 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_small_kernel) {
4287 TEST_REQUIRES_ARM_NEON;
4288 for (uint32_t n = 9; n < 16; n++) {
4289 for (size_t k = 1; k <= 20; k += 5) {
4290 GemmMicrokernelTester()
4291 .mr(4)
4292 .nr(8)
4293 .kr(1)
4294 .sr(1)
4295 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004296 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004297 .k(k)
4298 .ks(3)
4299 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4300 }
4301 }
4302 }
4303
4304 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_small_kernel) {
4305 TEST_REQUIRES_ARM_NEON;
4306 for (uint32_t n = 16; n <= 24; n += 8) {
4307 for (size_t k = 1; k <= 20; k += 5) {
4308 GemmMicrokernelTester()
4309 .mr(4)
4310 .nr(8)
4311 .kr(1)
4312 .sr(1)
4313 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004314 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004315 .k(k)
4316 .ks(3)
4317 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4318 }
4319 }
4320 }
4321
4322 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, strided_cm_subtile) {
4323 TEST_REQUIRES_ARM_NEON;
4324 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004325 for (uint32_t n = 1; n <= 8; n++) {
4326 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004327 GemmMicrokernelTester()
4328 .mr(4)
4329 .nr(8)
4330 .kr(1)
4331 .sr(1)
4332 .m(m)
4333 .n(n)
4334 .k(k)
4335 .cm_stride(11)
4336 .iterations(1)
4337 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4338 }
4339 }
4340 }
4341 }
4342
4343 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, a_offset) {
4344 TEST_REQUIRES_ARM_NEON;
4345 for (size_t k = 1; k <= 20; k += 5) {
4346 GemmMicrokernelTester()
4347 .mr(4)
4348 .nr(8)
4349 .kr(1)
4350 .sr(1)
4351 .m(4)
4352 .n(8)
4353 .k(k)
4354 .ks(3)
4355 .a_offset(83)
4356 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4357 }
4358 }
4359
4360 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, zero) {
4361 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004362 for (size_t k = 1; k <= 20; k += 5) {
4363 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004364 GemmMicrokernelTester()
4365 .mr(4)
4366 .nr(8)
4367 .kr(1)
4368 .sr(1)
4369 .m(4)
4370 .n(8)
4371 .k(k)
4372 .ks(3)
4373 .a_offset(83)
4374 .zero_index(mz)
4375 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4376 }
4377 }
4378 }
4379
4380 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, qmin) {
4381 TEST_REQUIRES_ARM_NEON;
4382 GemmMicrokernelTester()
4383 .mr(4)
4384 .nr(8)
4385 .kr(1)
4386 .sr(1)
4387 .m(4)
4388 .n(8)
4389 .k(4)
4390 .qmin(128)
4391 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4392 }
4393
4394 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, qmax) {
4395 TEST_REQUIRES_ARM_NEON;
4396 GemmMicrokernelTester()
4397 .mr(4)
4398 .nr(8)
4399 .kr(1)
4400 .sr(1)
4401 .m(4)
4402 .n(8)
4403 .k(4)
4404 .qmax(128)
4405 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4406 }
4407
4408 TEST(F32_IGEMM_MINMAX_4X8__AARCH32_NEON_CORTEX_A53, strided_cm) {
4409 TEST_REQUIRES_ARM_NEON;
4410 GemmMicrokernelTester()
4411 .mr(4)
4412 .nr(8)
4413 .kr(1)
4414 .sr(1)
4415 .m(4)
4416 .n(8)
4417 .k(4)
4418 .cm_stride(11)
4419 .Test(xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
4420 }
4421#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
4422
4423
4424#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4425 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4) {
4426 TEST_REQUIRES_ARM_NEON_FMA;
4427 GemmMicrokernelTester()
4428 .mr(6)
4429 .nr(8)
4430 .kr(1)
4431 .sr(1)
4432 .m(6)
4433 .n(8)
4434 .k(4)
4435 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4436 }
4437
4438 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cn) {
4439 TEST_REQUIRES_ARM_NEON_FMA;
4440 GemmMicrokernelTester()
4441 .mr(6)
4442 .nr(8)
4443 .kr(1)
4444 .sr(1)
4445 .m(6)
4446 .n(8)
4447 .k(4)
4448 .cn_stride(11)
4449 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4450 }
4451
4452 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile) {
4453 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004454 for (uint32_t n = 1; n <= 8; n++) {
4455 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004456 GemmMicrokernelTester()
4457 .mr(6)
4458 .nr(8)
4459 .kr(1)
4460 .sr(1)
4461 .m(m)
4462 .n(n)
4463 .k(4)
4464 .iterations(1)
4465 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4466 }
4467 }
4468 }
4469
4470 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_m) {
4471 TEST_REQUIRES_ARM_NEON_FMA;
4472 for (uint32_t m = 1; m <= 6; m++) {
4473 GemmMicrokernelTester()
4474 .mr(6)
4475 .nr(8)
4476 .kr(1)
4477 .sr(1)
4478 .m(m)
4479 .n(8)
4480 .k(4)
4481 .iterations(1)
4482 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4483 }
4484 }
4485
4486 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_4_subtile_n) {
4487 TEST_REQUIRES_ARM_NEON_FMA;
4488 for (uint32_t n = 1; n <= 8; n++) {
4489 GemmMicrokernelTester()
4490 .mr(6)
4491 .nr(8)
4492 .kr(1)
4493 .sr(1)
4494 .m(6)
4495 .n(n)
4496 .k(4)
4497 .iterations(1)
4498 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4499 }
4500 }
4501
4502 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8) {
4503 TEST_REQUIRES_ARM_NEON_FMA;
4504 GemmMicrokernelTester()
4505 .mr(6)
4506 .nr(8)
4507 .kr(1)
4508 .sr(1)
4509 .m(6)
4510 .n(8)
4511 .k(8)
4512 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4513 }
4514
4515 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_eq_8_subtile) {
4516 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004517 for (uint32_t n = 1; n <= 8; n++) {
4518 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004519 GemmMicrokernelTester()
4520 .mr(6)
4521 .nr(8)
4522 .kr(1)
4523 .sr(1)
4524 .m(m)
4525 .n(n)
4526 .k(8)
4527 .iterations(1)
4528 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4529 }
4530 }
4531 }
4532
4533 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8) {
4534 TEST_REQUIRES_ARM_NEON_FMA;
4535 for (size_t k = 1; k < 8; k++) {
4536 GemmMicrokernelTester()
4537 .mr(6)
4538 .nr(8)
4539 .kr(1)
4540 .sr(1)
4541 .m(6)
4542 .n(8)
4543 .k(k)
4544 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4545 }
4546 }
4547
4548 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_lt_8_subtile) {
4549 TEST_REQUIRES_ARM_NEON_FMA;
4550 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004551 for (uint32_t n = 1; n <= 8; n++) {
4552 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004553 GemmMicrokernelTester()
4554 .mr(6)
4555 .nr(8)
4556 .kr(1)
4557 .sr(1)
4558 .m(m)
4559 .n(n)
4560 .k(k)
4561 .iterations(1)
4562 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4563 }
4564 }
4565 }
4566 }
4567
4568 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8) {
4569 TEST_REQUIRES_ARM_NEON_FMA;
4570 for (size_t k = 9; k < 16; k++) {
4571 GemmMicrokernelTester()
4572 .mr(6)
4573 .nr(8)
4574 .kr(1)
4575 .sr(1)
4576 .m(6)
4577 .n(8)
4578 .k(k)
4579 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4580 }
4581 }
4582
4583 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_gt_8_subtile) {
4584 TEST_REQUIRES_ARM_NEON_FMA;
4585 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004586 for (uint32_t n = 1; n <= 8; n++) {
4587 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004588 GemmMicrokernelTester()
4589 .mr(6)
4590 .nr(8)
4591 .kr(1)
4592 .sr(1)
4593 .m(m)
4594 .n(n)
4595 .k(k)
4596 .iterations(1)
4597 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4598 }
4599 }
4600 }
4601 }
4602
4603 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4) {
4604 TEST_REQUIRES_ARM_NEON_FMA;
4605 for (size_t k = 12; k <= 40; k += 4) {
4606 GemmMicrokernelTester()
4607 .mr(6)
4608 .nr(8)
4609 .kr(1)
4610 .sr(1)
4611 .m(6)
4612 .n(8)
4613 .k(k)
4614 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4615 }
4616 }
4617
4618 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, k_div_4_subtile) {
4619 TEST_REQUIRES_ARM_NEON_FMA;
4620 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004621 for (uint32_t n = 1; n <= 8; n++) {
4622 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004623 GemmMicrokernelTester()
4624 .mr(6)
4625 .nr(8)
4626 .kr(1)
4627 .sr(1)
4628 .m(m)
4629 .n(n)
4630 .k(k)
4631 .iterations(1)
4632 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4633 }
4634 }
4635 }
4636 }
4637
4638 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8) {
4639 TEST_REQUIRES_ARM_NEON_FMA;
4640 for (uint32_t n = 9; n < 16; n++) {
4641 for (size_t k = 1; k <= 20; k += 5) {
4642 GemmMicrokernelTester()
4643 .mr(6)
4644 .nr(8)
4645 .kr(1)
4646 .sr(1)
4647 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004648 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004649 .k(k)
4650 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4651 }
4652 }
4653 }
4654
4655 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_strided_cn) {
4656 TEST_REQUIRES_ARM_NEON_FMA;
4657 for (uint32_t n = 9; n < 16; n++) {
4658 for (size_t k = 1; k <= 20; k += 5) {
4659 GemmMicrokernelTester()
4660 .mr(6)
4661 .nr(8)
4662 .kr(1)
4663 .sr(1)
4664 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004665 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004666 .k(k)
4667 .cn_stride(11)
4668 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4669 }
4670 }
4671 }
4672
4673 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_subtile) {
4674 TEST_REQUIRES_ARM_NEON_FMA;
4675 for (uint32_t n = 9; n < 16; n++) {
4676 for (size_t k = 1; k <= 20; k += 5) {
4677 for (uint32_t m = 1; m <= 6; m++) {
4678 GemmMicrokernelTester()
4679 .mr(6)
4680 .nr(8)
4681 .kr(1)
4682 .sr(1)
4683 .m(m)
4684 .n(n)
4685 .k(k)
4686 .iterations(1)
4687 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4688 }
4689 }
4690 }
4691 }
4692
4693 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8) {
4694 TEST_REQUIRES_ARM_NEON_FMA;
4695 for (uint32_t n = 16; n <= 24; n += 8) {
4696 for (size_t k = 1; k <= 20; k += 5) {
4697 GemmMicrokernelTester()
4698 .mr(6)
4699 .nr(8)
4700 .kr(1)
4701 .sr(1)
4702 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004703 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004704 .k(k)
4705 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4706 }
4707 }
4708 }
4709
4710 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_strided_cn) {
4711 TEST_REQUIRES_ARM_NEON_FMA;
4712 for (uint32_t n = 16; n <= 24; n += 8) {
4713 for (size_t k = 1; k <= 20; k += 5) {
4714 GemmMicrokernelTester()
4715 .mr(6)
4716 .nr(8)
4717 .kr(1)
4718 .sr(1)
4719 .m(6)
4720 .n(n)
4721 .k(k)
4722 .cn_stride(11)
4723 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4724 }
4725 }
4726 }
4727
4728 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_subtile) {
4729 TEST_REQUIRES_ARM_NEON_FMA;
4730 for (uint32_t n = 16; n <= 24; n += 8) {
4731 for (size_t k = 1; k <= 20; k += 5) {
4732 for (uint32_t m = 1; m <= 6; m++) {
4733 GemmMicrokernelTester()
4734 .mr(6)
4735 .nr(8)
4736 .kr(1)
4737 .sr(1)
4738 .m(m)
4739 .n(n)
4740 .k(k)
4741 .iterations(1)
4742 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4743 }
4744 }
4745 }
4746 }
4747
4748 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel) {
4749 TEST_REQUIRES_ARM_NEON_FMA;
4750 for (size_t k = 1; k <= 20; k += 5) {
4751 GemmMicrokernelTester()
4752 .mr(6)
4753 .nr(8)
4754 .kr(1)
4755 .sr(1)
4756 .m(6)
4757 .n(8)
4758 .k(k)
4759 .ks(3)
4760 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4761 }
4762 }
4763
4764 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, small_kernel_subtile) {
4765 TEST_REQUIRES_ARM_NEON_FMA;
4766 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004767 for (uint32_t n = 1; n <= 8; n++) {
4768 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004769 GemmMicrokernelTester()
4770 .mr(6)
4771 .nr(8)
4772 .kr(1)
4773 .sr(1)
4774 .m(m)
4775 .n(n)
4776 .k(k)
4777 .ks(3)
4778 .iterations(1)
4779 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4780 }
4781 }
4782 }
4783 }
4784
4785 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_gt_8_small_kernel) {
4786 TEST_REQUIRES_ARM_NEON_FMA;
4787 for (uint32_t n = 9; n < 16; n++) {
4788 for (size_t k = 1; k <= 20; k += 5) {
4789 GemmMicrokernelTester()
4790 .mr(6)
4791 .nr(8)
4792 .kr(1)
4793 .sr(1)
4794 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004795 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004796 .k(k)
4797 .ks(3)
4798 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4799 }
4800 }
4801 }
4802
4803 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, n_div_8_small_kernel) {
4804 TEST_REQUIRES_ARM_NEON_FMA;
4805 for (uint32_t n = 16; n <= 24; n += 8) {
4806 for (size_t k = 1; k <= 20; k += 5) {
4807 GemmMicrokernelTester()
4808 .mr(6)
4809 .nr(8)
4810 .kr(1)
4811 .sr(1)
4812 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004813 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004814 .k(k)
4815 .ks(3)
4816 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4817 }
4818 }
4819 }
4820
4821 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm_subtile) {
4822 TEST_REQUIRES_ARM_NEON_FMA;
4823 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004824 for (uint32_t n = 1; n <= 8; n++) {
4825 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004826 GemmMicrokernelTester()
4827 .mr(6)
4828 .nr(8)
4829 .kr(1)
4830 .sr(1)
4831 .m(m)
4832 .n(n)
4833 .k(k)
4834 .cm_stride(11)
4835 .iterations(1)
4836 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4837 }
4838 }
4839 }
4840 }
4841
4842 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, a_offset) {
4843 TEST_REQUIRES_ARM_NEON_FMA;
4844 for (size_t k = 1; k <= 20; k += 5) {
4845 GemmMicrokernelTester()
4846 .mr(6)
4847 .nr(8)
4848 .kr(1)
4849 .sr(1)
4850 .m(6)
4851 .n(8)
4852 .k(k)
4853 .ks(3)
4854 .a_offset(127)
4855 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4856 }
4857 }
4858
4859 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, zero) {
4860 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004861 for (size_t k = 1; k <= 20; k += 5) {
4862 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004863 GemmMicrokernelTester()
4864 .mr(6)
4865 .nr(8)
4866 .kr(1)
4867 .sr(1)
4868 .m(6)
4869 .n(8)
4870 .k(k)
4871 .ks(3)
4872 .a_offset(127)
4873 .zero_index(mz)
4874 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4875 }
4876 }
4877 }
4878
4879 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmin) {
4880 TEST_REQUIRES_ARM_NEON_FMA;
4881 GemmMicrokernelTester()
4882 .mr(6)
4883 .nr(8)
4884 .kr(1)
4885 .sr(1)
4886 .m(6)
4887 .n(8)
4888 .k(4)
4889 .qmin(128)
4890 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4891 }
4892
4893 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, qmax) {
4894 TEST_REQUIRES_ARM_NEON_FMA;
4895 GemmMicrokernelTester()
4896 .mr(6)
4897 .nr(8)
4898 .kr(1)
4899 .sr(1)
4900 .m(6)
4901 .n(8)
4902 .k(4)
4903 .qmax(128)
4904 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4905 }
4906
4907 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A55, strided_cm) {
4908 TEST_REQUIRES_ARM_NEON_FMA;
4909 GemmMicrokernelTester()
4910 .mr(6)
4911 .nr(8)
4912 .kr(1)
4913 .sr(1)
4914 .m(6)
4915 .n(8)
4916 .k(4)
4917 .cm_stride(11)
4918 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, xnn_init_f32_minmax_scalar_params);
4919 }
4920#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4921
4922
4923#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4924 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
4925 TEST_REQUIRES_ARM_NEON_FMA;
4926 GemmMicrokernelTester()
4927 .mr(6)
4928 .nr(8)
4929 .kr(1)
4930 .sr(1)
4931 .m(6)
4932 .n(8)
4933 .k(8)
4934 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4935 }
4936
4937 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
4938 TEST_REQUIRES_ARM_NEON_FMA;
4939 GemmMicrokernelTester()
4940 .mr(6)
4941 .nr(8)
4942 .kr(1)
4943 .sr(1)
4944 .m(6)
4945 .n(8)
4946 .k(8)
4947 .cn_stride(11)
4948 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4949 }
4950
4951 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
4952 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004953 for (uint32_t n = 1; n <= 8; n++) {
4954 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004955 GemmMicrokernelTester()
4956 .mr(6)
4957 .nr(8)
4958 .kr(1)
4959 .sr(1)
4960 .m(m)
4961 .n(n)
4962 .k(8)
4963 .iterations(1)
4964 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4965 }
4966 }
4967 }
4968
4969 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
4970 TEST_REQUIRES_ARM_NEON_FMA;
4971 for (uint32_t m = 1; m <= 6; m++) {
4972 GemmMicrokernelTester()
4973 .mr(6)
4974 .nr(8)
4975 .kr(1)
4976 .sr(1)
4977 .m(m)
4978 .n(8)
4979 .k(8)
4980 .iterations(1)
4981 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4982 }
4983 }
4984
4985 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
4986 TEST_REQUIRES_ARM_NEON_FMA;
4987 for (uint32_t n = 1; n <= 8; n++) {
4988 GemmMicrokernelTester()
4989 .mr(6)
4990 .nr(8)
4991 .kr(1)
4992 .sr(1)
4993 .m(6)
4994 .n(n)
4995 .k(8)
4996 .iterations(1)
4997 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
4998 }
4999 }
5000
5001 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
5002 TEST_REQUIRES_ARM_NEON_FMA;
5003 GemmMicrokernelTester()
5004 .mr(6)
5005 .nr(8)
5006 .kr(1)
5007 .sr(1)
5008 .m(6)
5009 .n(8)
5010 .k(16)
5011 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5012 }
5013
5014 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
5015 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005016 for (uint32_t n = 1; n <= 8; n++) {
5017 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005018 GemmMicrokernelTester()
5019 .mr(6)
5020 .nr(8)
5021 .kr(1)
5022 .sr(1)
5023 .m(m)
5024 .n(n)
5025 .k(16)
5026 .iterations(1)
5027 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5028 }
5029 }
5030 }
5031
5032 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
5033 TEST_REQUIRES_ARM_NEON_FMA;
5034 for (size_t k = 1; k < 16; k++) {
5035 GemmMicrokernelTester()
5036 .mr(6)
5037 .nr(8)
5038 .kr(1)
5039 .sr(1)
5040 .m(6)
5041 .n(8)
5042 .k(k)
5043 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5044 }
5045 }
5046
5047 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
5048 TEST_REQUIRES_ARM_NEON_FMA;
5049 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005050 for (uint32_t n = 1; n <= 8; n++) {
5051 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005052 GemmMicrokernelTester()
5053 .mr(6)
5054 .nr(8)
5055 .kr(1)
5056 .sr(1)
5057 .m(m)
5058 .n(n)
5059 .k(k)
5060 .iterations(1)
5061 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5062 }
5063 }
5064 }
5065 }
5066
5067 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
5068 TEST_REQUIRES_ARM_NEON_FMA;
5069 for (size_t k = 17; k < 32; k++) {
5070 GemmMicrokernelTester()
5071 .mr(6)
5072 .nr(8)
5073 .kr(1)
5074 .sr(1)
5075 .m(6)
5076 .n(8)
5077 .k(k)
5078 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5079 }
5080 }
5081
5082 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
5083 TEST_REQUIRES_ARM_NEON_FMA;
5084 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005085 for (uint32_t n = 1; n <= 8; n++) {
5086 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005087 GemmMicrokernelTester()
5088 .mr(6)
5089 .nr(8)
5090 .kr(1)
5091 .sr(1)
5092 .m(m)
5093 .n(n)
5094 .k(k)
5095 .iterations(1)
5096 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5097 }
5098 }
5099 }
5100 }
5101
5102 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
5103 TEST_REQUIRES_ARM_NEON_FMA;
5104 for (size_t k = 24; k <= 80; k += 8) {
5105 GemmMicrokernelTester()
5106 .mr(6)
5107 .nr(8)
5108 .kr(1)
5109 .sr(1)
5110 .m(6)
5111 .n(8)
5112 .k(k)
5113 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5114 }
5115 }
5116
5117 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
5118 TEST_REQUIRES_ARM_NEON_FMA;
5119 for (size_t k = 24; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005120 for (uint32_t n = 1; n <= 8; n++) {
5121 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005122 GemmMicrokernelTester()
5123 .mr(6)
5124 .nr(8)
5125 .kr(1)
5126 .sr(1)
5127 .m(m)
5128 .n(n)
5129 .k(k)
5130 .iterations(1)
5131 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5132 }
5133 }
5134 }
5135 }
5136
5137 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
5138 TEST_REQUIRES_ARM_NEON_FMA;
5139 for (uint32_t n = 9; n < 16; n++) {
5140 for (size_t k = 1; k <= 40; k += 9) {
5141 GemmMicrokernelTester()
5142 .mr(6)
5143 .nr(8)
5144 .kr(1)
5145 .sr(1)
5146 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005147 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005148 .k(k)
5149 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5150 }
5151 }
5152 }
5153
5154 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
5155 TEST_REQUIRES_ARM_NEON_FMA;
5156 for (uint32_t n = 9; n < 16; n++) {
5157 for (size_t k = 1; k <= 40; k += 9) {
5158 GemmMicrokernelTester()
5159 .mr(6)
5160 .nr(8)
5161 .kr(1)
5162 .sr(1)
5163 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005164 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005165 .k(k)
5166 .cn_stride(11)
5167 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5168 }
5169 }
5170 }
5171
5172 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
5173 TEST_REQUIRES_ARM_NEON_FMA;
5174 for (uint32_t n = 9; n < 16; n++) {
5175 for (size_t k = 1; k <= 40; k += 9) {
5176 for (uint32_t m = 1; m <= 6; m++) {
5177 GemmMicrokernelTester()
5178 .mr(6)
5179 .nr(8)
5180 .kr(1)
5181 .sr(1)
5182 .m(m)
5183 .n(n)
5184 .k(k)
5185 .iterations(1)
5186 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5187 }
5188 }
5189 }
5190 }
5191
5192 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
5193 TEST_REQUIRES_ARM_NEON_FMA;
5194 for (uint32_t n = 16; n <= 24; n += 8) {
5195 for (size_t k = 1; k <= 40; k += 9) {
5196 GemmMicrokernelTester()
5197 .mr(6)
5198 .nr(8)
5199 .kr(1)
5200 .sr(1)
5201 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005202 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005203 .k(k)
5204 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5205 }
5206 }
5207 }
5208
5209 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
5210 TEST_REQUIRES_ARM_NEON_FMA;
5211 for (uint32_t n = 16; n <= 24; n += 8) {
5212 for (size_t k = 1; k <= 40; k += 9) {
5213 GemmMicrokernelTester()
5214 .mr(6)
5215 .nr(8)
5216 .kr(1)
5217 .sr(1)
5218 .m(6)
5219 .n(n)
5220 .k(k)
5221 .cn_stride(11)
5222 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5223 }
5224 }
5225 }
5226
5227 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
5228 TEST_REQUIRES_ARM_NEON_FMA;
5229 for (uint32_t n = 16; n <= 24; n += 8) {
5230 for (size_t k = 1; k <= 40; k += 9) {
5231 for (uint32_t m = 1; m <= 6; m++) {
5232 GemmMicrokernelTester()
5233 .mr(6)
5234 .nr(8)
5235 .kr(1)
5236 .sr(1)
5237 .m(m)
5238 .n(n)
5239 .k(k)
5240 .iterations(1)
5241 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5242 }
5243 }
5244 }
5245 }
5246
5247 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
5248 TEST_REQUIRES_ARM_NEON_FMA;
5249 for (size_t k = 1; k <= 40; k += 9) {
5250 GemmMicrokernelTester()
5251 .mr(6)
5252 .nr(8)
5253 .kr(1)
5254 .sr(1)
5255 .m(6)
5256 .n(8)
5257 .k(k)
5258 .ks(3)
5259 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5260 }
5261 }
5262
5263 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
5264 TEST_REQUIRES_ARM_NEON_FMA;
5265 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005266 for (uint32_t n = 1; n <= 8; n++) {
5267 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005268 GemmMicrokernelTester()
5269 .mr(6)
5270 .nr(8)
5271 .kr(1)
5272 .sr(1)
5273 .m(m)
5274 .n(n)
5275 .k(k)
5276 .ks(3)
5277 .iterations(1)
5278 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5279 }
5280 }
5281 }
5282 }
5283
5284 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
5285 TEST_REQUIRES_ARM_NEON_FMA;
5286 for (uint32_t n = 9; n < 16; n++) {
5287 for (size_t k = 1; k <= 40; k += 9) {
5288 GemmMicrokernelTester()
5289 .mr(6)
5290 .nr(8)
5291 .kr(1)
5292 .sr(1)
5293 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005294 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005295 .k(k)
5296 .ks(3)
5297 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5298 }
5299 }
5300 }
5301
5302 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
5303 TEST_REQUIRES_ARM_NEON_FMA;
5304 for (uint32_t n = 16; n <= 24; n += 8) {
5305 for (size_t k = 1; k <= 40; k += 9) {
5306 GemmMicrokernelTester()
5307 .mr(6)
5308 .nr(8)
5309 .kr(1)
5310 .sr(1)
5311 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005312 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005313 .k(k)
5314 .ks(3)
5315 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5316 }
5317 }
5318 }
5319
5320 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
5321 TEST_REQUIRES_ARM_NEON_FMA;
5322 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005323 for (uint32_t n = 1; n <= 8; n++) {
5324 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005325 GemmMicrokernelTester()
5326 .mr(6)
5327 .nr(8)
5328 .kr(1)
5329 .sr(1)
5330 .m(m)
5331 .n(n)
5332 .k(k)
5333 .cm_stride(11)
5334 .iterations(1)
5335 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5336 }
5337 }
5338 }
5339 }
5340
5341 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
5342 TEST_REQUIRES_ARM_NEON_FMA;
5343 for (size_t k = 1; k <= 40; k += 9) {
5344 GemmMicrokernelTester()
5345 .mr(6)
5346 .nr(8)
5347 .kr(1)
5348 .sr(1)
5349 .m(6)
5350 .n(8)
5351 .k(k)
5352 .ks(3)
5353 .a_offset(251)
5354 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5355 }
5356 }
5357
5358 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
5359 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005360 for (size_t k = 1; k <= 40; k += 9) {
5361 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005362 GemmMicrokernelTester()
5363 .mr(6)
5364 .nr(8)
5365 .kr(1)
5366 .sr(1)
5367 .m(6)
5368 .n(8)
5369 .k(k)
5370 .ks(3)
5371 .a_offset(251)
5372 .zero_index(mz)
5373 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5374 }
5375 }
5376 }
5377
5378 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
5379 TEST_REQUIRES_ARM_NEON_FMA;
5380 GemmMicrokernelTester()
5381 .mr(6)
5382 .nr(8)
5383 .kr(1)
5384 .sr(1)
5385 .m(6)
5386 .n(8)
5387 .k(8)
5388 .qmin(128)
5389 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5390 }
5391
5392 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
5393 TEST_REQUIRES_ARM_NEON_FMA;
5394 GemmMicrokernelTester()
5395 .mr(6)
5396 .nr(8)
5397 .kr(1)
5398 .sr(1)
5399 .m(6)
5400 .n(8)
5401 .k(8)
5402 .qmax(128)
5403 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5404 }
5405
5406 TEST(F32_IGEMM_MINMAX_6X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
5407 TEST_REQUIRES_ARM_NEON_FMA;
5408 GemmMicrokernelTester()
5409 .mr(6)
5410 .nr(8)
5411 .kr(1)
5412 .sr(1)
5413 .m(6)
5414 .n(8)
5415 .k(8)
5416 .cm_stride(11)
5417 .Test(xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
5418 }
5419#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5420
5421
5422#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5423 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
5424 TEST_REQUIRES_ARM_NEON_FMA;
5425 GemmMicrokernelTester()
5426 .mr(1)
5427 .nr(12)
5428 .kr(1)
5429 .sr(1)
5430 .m(1)
5431 .n(12)
5432 .k(4)
5433 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5434 }
5435
5436 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
5437 TEST_REQUIRES_ARM_NEON_FMA;
5438 GemmMicrokernelTester()
5439 .mr(1)
5440 .nr(12)
5441 .kr(1)
5442 .sr(1)
5443 .m(1)
5444 .n(12)
5445 .k(4)
5446 .cn_stride(17)
5447 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5448 }
5449
5450 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
5451 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005452 for (uint32_t n = 1; n <= 12; n++) {
5453 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005454 GemmMicrokernelTester()
5455 .mr(1)
5456 .nr(12)
5457 .kr(1)
5458 .sr(1)
5459 .m(m)
5460 .n(n)
5461 .k(4)
5462 .iterations(1)
5463 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5464 }
5465 }
5466 }
5467
5468 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
5469 TEST_REQUIRES_ARM_NEON_FMA;
5470 for (uint32_t m = 1; m <= 1; m++) {
5471 GemmMicrokernelTester()
5472 .mr(1)
5473 .nr(12)
5474 .kr(1)
5475 .sr(1)
5476 .m(m)
5477 .n(12)
5478 .k(4)
5479 .iterations(1)
5480 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5481 }
5482 }
5483
5484 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
5485 TEST_REQUIRES_ARM_NEON_FMA;
5486 for (uint32_t n = 1; n <= 12; n++) {
5487 GemmMicrokernelTester()
5488 .mr(1)
5489 .nr(12)
5490 .kr(1)
5491 .sr(1)
5492 .m(1)
5493 .n(n)
5494 .k(4)
5495 .iterations(1)
5496 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5497 }
5498 }
5499
5500 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
5501 TEST_REQUIRES_ARM_NEON_FMA;
5502 GemmMicrokernelTester()
5503 .mr(1)
5504 .nr(12)
5505 .kr(1)
5506 .sr(1)
5507 .m(1)
5508 .n(12)
5509 .k(8)
5510 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5511 }
5512
5513 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
5514 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005515 for (uint32_t n = 1; n <= 12; n++) {
5516 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005517 GemmMicrokernelTester()
5518 .mr(1)
5519 .nr(12)
5520 .kr(1)
5521 .sr(1)
5522 .m(m)
5523 .n(n)
5524 .k(8)
5525 .iterations(1)
5526 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5527 }
5528 }
5529 }
5530
5531 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
5532 TEST_REQUIRES_ARM_NEON_FMA;
5533 for (size_t k = 1; k < 8; k++) {
5534 GemmMicrokernelTester()
5535 .mr(1)
5536 .nr(12)
5537 .kr(1)
5538 .sr(1)
5539 .m(1)
5540 .n(12)
5541 .k(k)
5542 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5543 }
5544 }
5545
5546 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
5547 TEST_REQUIRES_ARM_NEON_FMA;
5548 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005549 for (uint32_t n = 1; n <= 12; n++) {
5550 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005551 GemmMicrokernelTester()
5552 .mr(1)
5553 .nr(12)
5554 .kr(1)
5555 .sr(1)
5556 .m(m)
5557 .n(n)
5558 .k(k)
5559 .iterations(1)
5560 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5561 }
5562 }
5563 }
5564 }
5565
5566 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
5567 TEST_REQUIRES_ARM_NEON_FMA;
5568 for (size_t k = 9; k < 16; k++) {
5569 GemmMicrokernelTester()
5570 .mr(1)
5571 .nr(12)
5572 .kr(1)
5573 .sr(1)
5574 .m(1)
5575 .n(12)
5576 .k(k)
5577 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5578 }
5579 }
5580
5581 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
5582 TEST_REQUIRES_ARM_NEON_FMA;
5583 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005584 for (uint32_t n = 1; n <= 12; n++) {
5585 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005586 GemmMicrokernelTester()
5587 .mr(1)
5588 .nr(12)
5589 .kr(1)
5590 .sr(1)
5591 .m(m)
5592 .n(n)
5593 .k(k)
5594 .iterations(1)
5595 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5596 }
5597 }
5598 }
5599 }
5600
5601 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
5602 TEST_REQUIRES_ARM_NEON_FMA;
5603 for (size_t k = 12; k <= 40; k += 4) {
5604 GemmMicrokernelTester()
5605 .mr(1)
5606 .nr(12)
5607 .kr(1)
5608 .sr(1)
5609 .m(1)
5610 .n(12)
5611 .k(k)
5612 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5613 }
5614 }
5615
5616 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
5617 TEST_REQUIRES_ARM_NEON_FMA;
5618 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005619 for (uint32_t n = 1; n <= 12; n++) {
5620 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005621 GemmMicrokernelTester()
5622 .mr(1)
5623 .nr(12)
5624 .kr(1)
5625 .sr(1)
5626 .m(m)
5627 .n(n)
5628 .k(k)
5629 .iterations(1)
5630 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5631 }
5632 }
5633 }
5634 }
5635
5636 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
5637 TEST_REQUIRES_ARM_NEON_FMA;
5638 for (uint32_t n = 13; n < 24; n++) {
5639 for (size_t k = 1; k <= 20; k += 5) {
5640 GemmMicrokernelTester()
5641 .mr(1)
5642 .nr(12)
5643 .kr(1)
5644 .sr(1)
5645 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005646 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005647 .k(k)
5648 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5649 }
5650 }
5651 }
5652
5653 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
5654 TEST_REQUIRES_ARM_NEON_FMA;
5655 for (uint32_t n = 13; n < 24; n++) {
5656 for (size_t k = 1; k <= 20; k += 5) {
5657 GemmMicrokernelTester()
5658 .mr(1)
5659 .nr(12)
5660 .kr(1)
5661 .sr(1)
5662 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005663 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005664 .k(k)
5665 .cn_stride(17)
5666 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5667 }
5668 }
5669 }
5670
5671 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
5672 TEST_REQUIRES_ARM_NEON_FMA;
5673 for (uint32_t n = 13; n < 24; n++) {
5674 for (size_t k = 1; k <= 20; k += 5) {
5675 for (uint32_t m = 1; m <= 1; m++) {
5676 GemmMicrokernelTester()
5677 .mr(1)
5678 .nr(12)
5679 .kr(1)
5680 .sr(1)
5681 .m(m)
5682 .n(n)
5683 .k(k)
5684 .iterations(1)
5685 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5686 }
5687 }
5688 }
5689 }
5690
5691 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
5692 TEST_REQUIRES_ARM_NEON_FMA;
5693 for (uint32_t n = 24; n <= 36; n += 12) {
5694 for (size_t k = 1; k <= 20; k += 5) {
5695 GemmMicrokernelTester()
5696 .mr(1)
5697 .nr(12)
5698 .kr(1)
5699 .sr(1)
5700 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005701 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005702 .k(k)
5703 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5704 }
5705 }
5706 }
5707
5708 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
5709 TEST_REQUIRES_ARM_NEON_FMA;
5710 for (uint32_t n = 24; n <= 36; n += 12) {
5711 for (size_t k = 1; k <= 20; k += 5) {
5712 GemmMicrokernelTester()
5713 .mr(1)
5714 .nr(12)
5715 .kr(1)
5716 .sr(1)
5717 .m(1)
5718 .n(n)
5719 .k(k)
5720 .cn_stride(17)
5721 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5722 }
5723 }
5724 }
5725
5726 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
5727 TEST_REQUIRES_ARM_NEON_FMA;
5728 for (uint32_t n = 24; n <= 36; n += 12) {
5729 for (size_t k = 1; k <= 20; k += 5) {
5730 for (uint32_t m = 1; m <= 1; m++) {
5731 GemmMicrokernelTester()
5732 .mr(1)
5733 .nr(12)
5734 .kr(1)
5735 .sr(1)
5736 .m(m)
5737 .n(n)
5738 .k(k)
5739 .iterations(1)
5740 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5741 }
5742 }
5743 }
5744 }
5745
5746 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
5747 TEST_REQUIRES_ARM_NEON_FMA;
5748 for (size_t k = 1; k <= 20; k += 5) {
5749 GemmMicrokernelTester()
5750 .mr(1)
5751 .nr(12)
5752 .kr(1)
5753 .sr(1)
5754 .m(1)
5755 .n(12)
5756 .k(k)
5757 .ks(3)
5758 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5759 }
5760 }
5761
5762 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
5763 TEST_REQUIRES_ARM_NEON_FMA;
5764 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005765 for (uint32_t n = 1; n <= 12; n++) {
5766 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005767 GemmMicrokernelTester()
5768 .mr(1)
5769 .nr(12)
5770 .kr(1)
5771 .sr(1)
5772 .m(m)
5773 .n(n)
5774 .k(k)
5775 .ks(3)
5776 .iterations(1)
5777 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5778 }
5779 }
5780 }
5781 }
5782
5783 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_small_kernel) {
5784 TEST_REQUIRES_ARM_NEON_FMA;
5785 for (uint32_t n = 13; n < 24; n++) {
5786 for (size_t k = 1; k <= 20; k += 5) {
5787 GemmMicrokernelTester()
5788 .mr(1)
5789 .nr(12)
5790 .kr(1)
5791 .sr(1)
5792 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005793 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005794 .k(k)
5795 .ks(3)
5796 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5797 }
5798 }
5799 }
5800
5801 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_small_kernel) {
5802 TEST_REQUIRES_ARM_NEON_FMA;
5803 for (uint32_t n = 24; n <= 36; n += 12) {
5804 for (size_t k = 1; k <= 20; k += 5) {
5805 GemmMicrokernelTester()
5806 .mr(1)
5807 .nr(12)
5808 .kr(1)
5809 .sr(1)
5810 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005811 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005812 .k(k)
5813 .ks(3)
5814 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5815 }
5816 }
5817 }
5818
5819 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
5820 TEST_REQUIRES_ARM_NEON_FMA;
5821 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005822 for (uint32_t n = 1; n <= 12; n++) {
5823 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005824 GemmMicrokernelTester()
5825 .mr(1)
5826 .nr(12)
5827 .kr(1)
5828 .sr(1)
5829 .m(m)
5830 .n(n)
5831 .k(k)
5832 .cm_stride(17)
5833 .iterations(1)
5834 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5835 }
5836 }
5837 }
5838 }
5839
5840 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
5841 TEST_REQUIRES_ARM_NEON_FMA;
5842 for (size_t k = 1; k <= 20; k += 5) {
5843 GemmMicrokernelTester()
5844 .mr(1)
5845 .nr(12)
5846 .kr(1)
5847 .sr(1)
5848 .m(1)
5849 .n(12)
5850 .k(k)
5851 .ks(3)
5852 .a_offset(23)
5853 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5854 }
5855 }
5856
5857 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, zero) {
5858 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005859 for (size_t k = 1; k <= 20; k += 5) {
5860 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005861 GemmMicrokernelTester()
5862 .mr(1)
5863 .nr(12)
5864 .kr(1)
5865 .sr(1)
5866 .m(1)
5867 .n(12)
5868 .k(k)
5869 .ks(3)
5870 .a_offset(23)
5871 .zero_index(mz)
5872 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5873 }
5874 }
5875 }
5876
5877 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
5878 TEST_REQUIRES_ARM_NEON_FMA;
5879 GemmMicrokernelTester()
5880 .mr(1)
5881 .nr(12)
5882 .kr(1)
5883 .sr(1)
5884 .m(1)
5885 .n(12)
5886 .k(4)
5887 .qmin(128)
5888 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5889 }
5890
5891 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
5892 TEST_REQUIRES_ARM_NEON_FMA;
5893 GemmMicrokernelTester()
5894 .mr(1)
5895 .nr(12)
5896 .kr(1)
5897 .sr(1)
5898 .m(1)
5899 .n(12)
5900 .k(4)
5901 .qmax(128)
5902 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5903 }
5904
5905 TEST(F32_IGEMM_MINMAX_1X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
5906 TEST_REQUIRES_ARM_NEON_FMA;
5907 GemmMicrokernelTester()
5908 .mr(1)
5909 .nr(12)
5910 .kr(1)
5911 .sr(1)
5912 .m(1)
5913 .n(12)
5914 .k(4)
5915 .cm_stride(17)
5916 .Test(xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5917 }
5918#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5919
5920
5921#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5922 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4) {
5923 TEST_REQUIRES_ARM_NEON_FMA;
5924 GemmMicrokernelTester()
5925 .mr(4)
5926 .nr(12)
5927 .kr(1)
5928 .sr(1)
5929 .m(4)
5930 .n(12)
5931 .k(4)
5932 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5933 }
5934
5935 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cn) {
5936 TEST_REQUIRES_ARM_NEON_FMA;
5937 GemmMicrokernelTester()
5938 .mr(4)
5939 .nr(12)
5940 .kr(1)
5941 .sr(1)
5942 .m(4)
5943 .n(12)
5944 .k(4)
5945 .cn_stride(17)
5946 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5947 }
5948
5949 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile) {
5950 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005951 for (uint32_t n = 1; n <= 12; n++) {
5952 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005953 GemmMicrokernelTester()
5954 .mr(4)
5955 .nr(12)
5956 .kr(1)
5957 .sr(1)
5958 .m(m)
5959 .n(n)
5960 .k(4)
5961 .iterations(1)
5962 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5963 }
5964 }
5965 }
5966
5967 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_m) {
5968 TEST_REQUIRES_ARM_NEON_FMA;
5969 for (uint32_t m = 1; m <= 4; m++) {
5970 GemmMicrokernelTester()
5971 .mr(4)
5972 .nr(12)
5973 .kr(1)
5974 .sr(1)
5975 .m(m)
5976 .n(12)
5977 .k(4)
5978 .iterations(1)
5979 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5980 }
5981 }
5982
5983 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_4_subtile_n) {
5984 TEST_REQUIRES_ARM_NEON_FMA;
5985 for (uint32_t n = 1; n <= 12; n++) {
5986 GemmMicrokernelTester()
5987 .mr(4)
5988 .nr(12)
5989 .kr(1)
5990 .sr(1)
5991 .m(4)
5992 .n(n)
5993 .k(4)
5994 .iterations(1)
5995 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
5996 }
5997 }
5998
5999 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8) {
6000 TEST_REQUIRES_ARM_NEON_FMA;
6001 GemmMicrokernelTester()
6002 .mr(4)
6003 .nr(12)
6004 .kr(1)
6005 .sr(1)
6006 .m(4)
6007 .n(12)
6008 .k(8)
6009 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6010 }
6011
6012 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_eq_8_subtile) {
6013 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006014 for (uint32_t n = 1; n <= 12; n++) {
6015 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006016 GemmMicrokernelTester()
6017 .mr(4)
6018 .nr(12)
6019 .kr(1)
6020 .sr(1)
6021 .m(m)
6022 .n(n)
6023 .k(8)
6024 .iterations(1)
6025 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6026 }
6027 }
6028 }
6029
6030 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8) {
6031 TEST_REQUIRES_ARM_NEON_FMA;
6032 for (size_t k = 1; k < 8; k++) {
6033 GemmMicrokernelTester()
6034 .mr(4)
6035 .nr(12)
6036 .kr(1)
6037 .sr(1)
6038 .m(4)
6039 .n(12)
6040 .k(k)
6041 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6042 }
6043 }
6044
6045 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_lt_8_subtile) {
6046 TEST_REQUIRES_ARM_NEON_FMA;
6047 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006048 for (uint32_t n = 1; n <= 12; n++) {
6049 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006050 GemmMicrokernelTester()
6051 .mr(4)
6052 .nr(12)
6053 .kr(1)
6054 .sr(1)
6055 .m(m)
6056 .n(n)
6057 .k(k)
6058 .iterations(1)
6059 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6060 }
6061 }
6062 }
6063 }
6064
6065 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8) {
6066 TEST_REQUIRES_ARM_NEON_FMA;
6067 for (size_t k = 9; k < 16; k++) {
6068 GemmMicrokernelTester()
6069 .mr(4)
6070 .nr(12)
6071 .kr(1)
6072 .sr(1)
6073 .m(4)
6074 .n(12)
6075 .k(k)
6076 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6077 }
6078 }
6079
6080 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_gt_8_subtile) {
6081 TEST_REQUIRES_ARM_NEON_FMA;
6082 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006083 for (uint32_t n = 1; n <= 12; n++) {
6084 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006085 GemmMicrokernelTester()
6086 .mr(4)
6087 .nr(12)
6088 .kr(1)
6089 .sr(1)
6090 .m(m)
6091 .n(n)
6092 .k(k)
6093 .iterations(1)
6094 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6095 }
6096 }
6097 }
6098 }
6099
6100 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4) {
6101 TEST_REQUIRES_ARM_NEON_FMA;
6102 for (size_t k = 12; k <= 40; k += 4) {
6103 GemmMicrokernelTester()
6104 .mr(4)
6105 .nr(12)
6106 .kr(1)
6107 .sr(1)
6108 .m(4)
6109 .n(12)
6110 .k(k)
6111 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6112 }
6113 }
6114
6115 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, k_div_4_subtile) {
6116 TEST_REQUIRES_ARM_NEON_FMA;
6117 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006118 for (uint32_t n = 1; n <= 12; n++) {
6119 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006120 GemmMicrokernelTester()
6121 .mr(4)
6122 .nr(12)
6123 .kr(1)
6124 .sr(1)
6125 .m(m)
6126 .n(n)
6127 .k(k)
6128 .iterations(1)
6129 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6130 }
6131 }
6132 }
6133 }
6134
6135 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12) {
6136 TEST_REQUIRES_ARM_NEON_FMA;
6137 for (uint32_t n = 13; n < 24; n++) {
6138 for (size_t k = 1; k <= 20; k += 5) {
6139 GemmMicrokernelTester()
6140 .mr(4)
6141 .nr(12)
6142 .kr(1)
6143 .sr(1)
6144 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006145 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006146 .k(k)
6147 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6148 }
6149 }
6150 }
6151
6152 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_strided_cn) {
6153 TEST_REQUIRES_ARM_NEON_FMA;
6154 for (uint32_t n = 13; n < 24; n++) {
6155 for (size_t k = 1; k <= 20; k += 5) {
6156 GemmMicrokernelTester()
6157 .mr(4)
6158 .nr(12)
6159 .kr(1)
6160 .sr(1)
6161 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006162 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006163 .k(k)
6164 .cn_stride(17)
6165 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6166 }
6167 }
6168 }
6169
6170 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_subtile) {
6171 TEST_REQUIRES_ARM_NEON_FMA;
6172 for (uint32_t n = 13; n < 24; n++) {
6173 for (size_t k = 1; k <= 20; k += 5) {
6174 for (uint32_t m = 1; m <= 4; m++) {
6175 GemmMicrokernelTester()
6176 .mr(4)
6177 .nr(12)
6178 .kr(1)
6179 .sr(1)
6180 .m(m)
6181 .n(n)
6182 .k(k)
6183 .iterations(1)
6184 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6185 }
6186 }
6187 }
6188 }
6189
6190 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12) {
6191 TEST_REQUIRES_ARM_NEON_FMA;
6192 for (uint32_t n = 24; n <= 36; n += 12) {
6193 for (size_t k = 1; k <= 20; k += 5) {
6194 GemmMicrokernelTester()
6195 .mr(4)
6196 .nr(12)
6197 .kr(1)
6198 .sr(1)
6199 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006200 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006201 .k(k)
6202 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6203 }
6204 }
6205 }
6206
6207 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_strided_cn) {
6208 TEST_REQUIRES_ARM_NEON_FMA;
6209 for (uint32_t n = 24; n <= 36; n += 12) {
6210 for (size_t k = 1; k <= 20; k += 5) {
6211 GemmMicrokernelTester()
6212 .mr(4)
6213 .nr(12)
6214 .kr(1)
6215 .sr(1)
6216 .m(4)
6217 .n(n)
6218 .k(k)
6219 .cn_stride(17)
6220 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6221 }
6222 }
6223 }
6224
6225 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_subtile) {
6226 TEST_REQUIRES_ARM_NEON_FMA;
6227 for (uint32_t n = 24; n <= 36; n += 12) {
6228 for (size_t k = 1; k <= 20; k += 5) {
6229 for (uint32_t m = 1; m <= 4; m++) {
6230 GemmMicrokernelTester()
6231 .mr(4)
6232 .nr(12)
6233 .kr(1)
6234 .sr(1)
6235 .m(m)
6236 .n(n)
6237 .k(k)
6238 .iterations(1)
6239 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6240 }
6241 }
6242 }
6243 }
6244
6245 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel) {
6246 TEST_REQUIRES_ARM_NEON_FMA;
6247 for (size_t k = 1; k <= 20; k += 5) {
6248 GemmMicrokernelTester()
6249 .mr(4)
6250 .nr(12)
6251 .kr(1)
6252 .sr(1)
6253 .m(4)
6254 .n(12)
6255 .k(k)
6256 .ks(3)
6257 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6258 }
6259 }
6260
6261 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, small_kernel_subtile) {
6262 TEST_REQUIRES_ARM_NEON_FMA;
6263 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006264 for (uint32_t n = 1; n <= 12; n++) {
6265 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006266 GemmMicrokernelTester()
6267 .mr(4)
6268 .nr(12)
6269 .kr(1)
6270 .sr(1)
6271 .m(m)
6272 .n(n)
6273 .k(k)
6274 .ks(3)
6275 .iterations(1)
6276 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6277 }
6278 }
6279 }
6280 }
6281
6282 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_gt_12_small_kernel) {
6283 TEST_REQUIRES_ARM_NEON_FMA;
6284 for (uint32_t n = 13; n < 24; n++) {
6285 for (size_t k = 1; k <= 20; k += 5) {
6286 GemmMicrokernelTester()
6287 .mr(4)
6288 .nr(12)
6289 .kr(1)
6290 .sr(1)
6291 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006292 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006293 .k(k)
6294 .ks(3)
6295 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6296 }
6297 }
6298 }
6299
6300 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, n_div_12_small_kernel) {
6301 TEST_REQUIRES_ARM_NEON_FMA;
6302 for (uint32_t n = 24; n <= 36; n += 12) {
6303 for (size_t k = 1; k <= 20; k += 5) {
6304 GemmMicrokernelTester()
6305 .mr(4)
6306 .nr(12)
6307 .kr(1)
6308 .sr(1)
6309 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006310 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006311 .k(k)
6312 .ks(3)
6313 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6314 }
6315 }
6316 }
6317
6318 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm_subtile) {
6319 TEST_REQUIRES_ARM_NEON_FMA;
6320 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006321 for (uint32_t n = 1; n <= 12; n++) {
6322 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006323 GemmMicrokernelTester()
6324 .mr(4)
6325 .nr(12)
6326 .kr(1)
6327 .sr(1)
6328 .m(m)
6329 .n(n)
6330 .k(k)
6331 .cm_stride(17)
6332 .iterations(1)
6333 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6334 }
6335 }
6336 }
6337 }
6338
6339 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, a_offset) {
6340 TEST_REQUIRES_ARM_NEON_FMA;
6341 for (size_t k = 1; k <= 20; k += 5) {
6342 GemmMicrokernelTester()
6343 .mr(4)
6344 .nr(12)
6345 .kr(1)
6346 .sr(1)
6347 .m(4)
6348 .n(12)
6349 .k(k)
6350 .ks(3)
6351 .a_offset(83)
6352 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6353 }
6354 }
6355
6356 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, zero) {
6357 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006358 for (size_t k = 1; k <= 20; k += 5) {
6359 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006360 GemmMicrokernelTester()
6361 .mr(4)
6362 .nr(12)
6363 .kr(1)
6364 .sr(1)
6365 .m(4)
6366 .n(12)
6367 .k(k)
6368 .ks(3)
6369 .a_offset(83)
6370 .zero_index(mz)
6371 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6372 }
6373 }
6374 }
6375
6376 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmin) {
6377 TEST_REQUIRES_ARM_NEON_FMA;
6378 GemmMicrokernelTester()
6379 .mr(4)
6380 .nr(12)
6381 .kr(1)
6382 .sr(1)
6383 .m(4)
6384 .n(12)
6385 .k(4)
6386 .qmin(128)
6387 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6388 }
6389
6390 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, qmax) {
6391 TEST_REQUIRES_ARM_NEON_FMA;
6392 GemmMicrokernelTester()
6393 .mr(4)
6394 .nr(12)
6395 .kr(1)
6396 .sr(1)
6397 .m(4)
6398 .n(12)
6399 .k(4)
6400 .qmax(128)
6401 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6402 }
6403
6404 TEST(F32_IGEMM_MINMAX_4X12__AARCH64_NEONFMA_CORTEX_A53, strided_cm) {
6405 TEST_REQUIRES_ARM_NEON_FMA;
6406 GemmMicrokernelTester()
6407 .mr(4)
6408 .nr(12)
6409 .kr(1)
6410 .sr(1)
6411 .m(4)
6412 .n(12)
6413 .k(4)
6414 .cm_stride(17)
6415 .Test(xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, xnn_init_f32_minmax_scalar_params);
6416 }
6417#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
6418
6419
6420#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6421 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2) {
6422 TEST_REQUIRES_ARM_NEON;
6423 GemmMicrokernelTester()
6424 .mr(1)
6425 .nr(8)
6426 .kr(1)
6427 .sr(1)
6428 .m(1)
6429 .n(8)
6430 .k(2)
6431 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6432 }
6433
6434 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cn) {
6435 TEST_REQUIRES_ARM_NEON;
6436 GemmMicrokernelTester()
6437 .mr(1)
6438 .nr(8)
6439 .kr(1)
6440 .sr(1)
6441 .m(1)
6442 .n(8)
6443 .k(2)
6444 .cn_stride(11)
6445 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6446 }
6447
6448 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile) {
6449 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006450 for (uint32_t n = 1; n <= 8; n++) {
6451 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006452 GemmMicrokernelTester()
6453 .mr(1)
6454 .nr(8)
6455 .kr(1)
6456 .sr(1)
6457 .m(m)
6458 .n(n)
6459 .k(2)
6460 .iterations(1)
6461 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6462 }
6463 }
6464 }
6465
6466 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
6467 TEST_REQUIRES_ARM_NEON;
6468 for (uint32_t m = 1; m <= 1; m++) {
6469 GemmMicrokernelTester()
6470 .mr(1)
6471 .nr(8)
6472 .kr(1)
6473 .sr(1)
6474 .m(m)
6475 .n(8)
6476 .k(2)
6477 .iterations(1)
6478 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6479 }
6480 }
6481
6482 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
6483 TEST_REQUIRES_ARM_NEON;
6484 for (uint32_t n = 1; n <= 8; n++) {
6485 GemmMicrokernelTester()
6486 .mr(1)
6487 .nr(8)
6488 .kr(1)
6489 .sr(1)
6490 .m(1)
6491 .n(n)
6492 .k(2)
6493 .iterations(1)
6494 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6495 }
6496 }
6497
6498 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2) {
6499 TEST_REQUIRES_ARM_NEON;
6500 for (size_t k = 1; k < 2; k++) {
6501 GemmMicrokernelTester()
6502 .mr(1)
6503 .nr(8)
6504 .kr(1)
6505 .sr(1)
6506 .m(1)
6507 .n(8)
6508 .k(k)
6509 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6510 }
6511 }
6512
6513 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_lt_2_subtile) {
6514 TEST_REQUIRES_ARM_NEON;
6515 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006516 for (uint32_t n = 1; n <= 8; n++) {
6517 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006518 GemmMicrokernelTester()
6519 .mr(1)
6520 .nr(8)
6521 .kr(1)
6522 .sr(1)
6523 .m(m)
6524 .n(n)
6525 .k(k)
6526 .iterations(1)
6527 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6528 }
6529 }
6530 }
6531 }
6532
6533 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2) {
6534 TEST_REQUIRES_ARM_NEON;
6535 for (size_t k = 3; k < 4; k++) {
6536 GemmMicrokernelTester()
6537 .mr(1)
6538 .nr(8)
6539 .kr(1)
6540 .sr(1)
6541 .m(1)
6542 .n(8)
6543 .k(k)
6544 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6545 }
6546 }
6547
6548 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_gt_2_subtile) {
6549 TEST_REQUIRES_ARM_NEON;
6550 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006551 for (uint32_t n = 1; n <= 8; n++) {
6552 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006553 GemmMicrokernelTester()
6554 .mr(1)
6555 .nr(8)
6556 .kr(1)
6557 .sr(1)
6558 .m(m)
6559 .n(n)
6560 .k(k)
6561 .iterations(1)
6562 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6563 }
6564 }
6565 }
6566 }
6567
6568 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2) {
6569 TEST_REQUIRES_ARM_NEON;
6570 for (size_t k = 4; k <= 20; k += 2) {
6571 GemmMicrokernelTester()
6572 .mr(1)
6573 .nr(8)
6574 .kr(1)
6575 .sr(1)
6576 .m(1)
6577 .n(8)
6578 .k(k)
6579 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6580 }
6581 }
6582
6583 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, k_div_2_subtile) {
6584 TEST_REQUIRES_ARM_NEON;
6585 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006586 for (uint32_t n = 1; n <= 8; n++) {
6587 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006588 GemmMicrokernelTester()
6589 .mr(1)
6590 .nr(8)
6591 .kr(1)
6592 .sr(1)
6593 .m(m)
6594 .n(n)
6595 .k(k)
6596 .iterations(1)
6597 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6598 }
6599 }
6600 }
6601 }
6602
6603 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8) {
6604 TEST_REQUIRES_ARM_NEON;
6605 for (uint32_t n = 9; n < 16; n++) {
6606 for (size_t k = 1; k <= 10; k += 3) {
6607 GemmMicrokernelTester()
6608 .mr(1)
6609 .nr(8)
6610 .kr(1)
6611 .sr(1)
6612 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006613 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006614 .k(k)
6615 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6616 }
6617 }
6618 }
6619
6620 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
6621 TEST_REQUIRES_ARM_NEON;
6622 for (uint32_t n = 9; n < 16; n++) {
6623 for (size_t k = 1; k <= 10; k += 3) {
6624 GemmMicrokernelTester()
6625 .mr(1)
6626 .nr(8)
6627 .kr(1)
6628 .sr(1)
6629 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006630 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006631 .k(k)
6632 .cn_stride(11)
6633 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6634 }
6635 }
6636 }
6637
6638 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_subtile) {
6639 TEST_REQUIRES_ARM_NEON;
6640 for (uint32_t n = 9; n < 16; n++) {
6641 for (size_t k = 1; k <= 10; k += 3) {
6642 for (uint32_t m = 1; m <= 1; m++) {
6643 GemmMicrokernelTester()
6644 .mr(1)
6645 .nr(8)
6646 .kr(1)
6647 .sr(1)
6648 .m(m)
6649 .n(n)
6650 .k(k)
6651 .iterations(1)
6652 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6653 }
6654 }
6655 }
6656 }
6657
6658 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8) {
6659 TEST_REQUIRES_ARM_NEON;
6660 for (uint32_t n = 16; n <= 24; n += 8) {
6661 for (size_t k = 1; k <= 10; k += 3) {
6662 GemmMicrokernelTester()
6663 .mr(1)
6664 .nr(8)
6665 .kr(1)
6666 .sr(1)
6667 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006668 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006669 .k(k)
6670 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6671 }
6672 }
6673 }
6674
6675 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_strided_cn) {
6676 TEST_REQUIRES_ARM_NEON;
6677 for (uint32_t n = 16; n <= 24; n += 8) {
6678 for (size_t k = 1; k <= 10; k += 3) {
6679 GemmMicrokernelTester()
6680 .mr(1)
6681 .nr(8)
6682 .kr(1)
6683 .sr(1)
6684 .m(1)
6685 .n(n)
6686 .k(k)
6687 .cn_stride(11)
6688 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6689 }
6690 }
6691 }
6692
6693 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_subtile) {
6694 TEST_REQUIRES_ARM_NEON;
6695 for (uint32_t n = 16; n <= 24; n += 8) {
6696 for (size_t k = 1; k <= 10; k += 3) {
6697 for (uint32_t m = 1; m <= 1; m++) {
6698 GemmMicrokernelTester()
6699 .mr(1)
6700 .nr(8)
6701 .kr(1)
6702 .sr(1)
6703 .m(m)
6704 .n(n)
6705 .k(k)
6706 .iterations(1)
6707 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6708 }
6709 }
6710 }
6711 }
6712
6713 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, small_kernel) {
6714 TEST_REQUIRES_ARM_NEON;
6715 for (size_t k = 1; k <= 10; k += 3) {
6716 GemmMicrokernelTester()
6717 .mr(1)
6718 .nr(8)
6719 .kr(1)
6720 .sr(1)
6721 .m(1)
6722 .n(8)
6723 .k(k)
6724 .ks(3)
6725 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6726 }
6727 }
6728
6729 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, small_kernel_subtile) {
6730 TEST_REQUIRES_ARM_NEON;
6731 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006732 for (uint32_t n = 1; n <= 8; n++) {
6733 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006734 GemmMicrokernelTester()
6735 .mr(1)
6736 .nr(8)
6737 .kr(1)
6738 .sr(1)
6739 .m(m)
6740 .n(n)
6741 .k(k)
6742 .ks(3)
6743 .iterations(1)
6744 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6745 }
6746 }
6747 }
6748 }
6749
6750 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_gt_8_small_kernel) {
6751 TEST_REQUIRES_ARM_NEON;
6752 for (uint32_t n = 9; n < 16; n++) {
6753 for (size_t k = 1; k <= 10; k += 3) {
6754 GemmMicrokernelTester()
6755 .mr(1)
6756 .nr(8)
6757 .kr(1)
6758 .sr(1)
6759 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006760 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006761 .k(k)
6762 .ks(3)
6763 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6764 }
6765 }
6766 }
6767
6768 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, n_div_8_small_kernel) {
6769 TEST_REQUIRES_ARM_NEON;
6770 for (uint32_t n = 16; n <= 24; n += 8) {
6771 for (size_t k = 1; k <= 10; k += 3) {
6772 GemmMicrokernelTester()
6773 .mr(1)
6774 .nr(8)
6775 .kr(1)
6776 .sr(1)
6777 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006778 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006779 .k(k)
6780 .ks(3)
6781 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6782 }
6783 }
6784 }
6785
6786 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cm_subtile) {
6787 TEST_REQUIRES_ARM_NEON;
6788 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006789 for (uint32_t n = 1; n <= 8; n++) {
6790 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006791 GemmMicrokernelTester()
6792 .mr(1)
6793 .nr(8)
6794 .kr(1)
6795 .sr(1)
6796 .m(m)
6797 .n(n)
6798 .k(k)
6799 .cm_stride(11)
6800 .iterations(1)
6801 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6802 }
6803 }
6804 }
6805 }
6806
6807 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, a_offset) {
6808 TEST_REQUIRES_ARM_NEON;
6809 for (size_t k = 1; k <= 10; k += 3) {
6810 GemmMicrokernelTester()
6811 .mr(1)
6812 .nr(8)
6813 .kr(1)
6814 .sr(1)
6815 .m(1)
6816 .n(8)
6817 .k(k)
6818 .ks(3)
6819 .a_offset(13)
6820 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6821 }
6822 }
6823
6824 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, zero) {
6825 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006826 for (size_t k = 1; k <= 10; k += 3) {
6827 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006828 GemmMicrokernelTester()
6829 .mr(1)
6830 .nr(8)
6831 .kr(1)
6832 .sr(1)
6833 .m(1)
6834 .n(8)
6835 .k(k)
6836 .ks(3)
6837 .a_offset(13)
6838 .zero_index(mz)
6839 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6840 }
6841 }
6842 }
6843
6844 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, qmin) {
6845 TEST_REQUIRES_ARM_NEON;
6846 GemmMicrokernelTester()
6847 .mr(1)
6848 .nr(8)
6849 .kr(1)
6850 .sr(1)
6851 .m(1)
6852 .n(8)
6853 .k(2)
6854 .qmin(128)
6855 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6856 }
6857
6858 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, qmax) {
6859 TEST_REQUIRES_ARM_NEON;
6860 GemmMicrokernelTester()
6861 .mr(1)
6862 .nr(8)
6863 .kr(1)
6864 .sr(1)
6865 .m(1)
6866 .n(8)
6867 .k(2)
6868 .qmax(128)
6869 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6870 }
6871
6872 TEST(F32_IGEMM_MINMAX_1X8__NEON_LANE_LD64, strided_cm) {
6873 TEST_REQUIRES_ARM_NEON;
6874 GemmMicrokernelTester()
6875 .mr(1)
6876 .nr(8)
6877 .kr(1)
6878 .sr(1)
6879 .m(1)
6880 .n(8)
6881 .k(2)
6882 .cm_stride(11)
6883 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
6884 }
6885#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6886
6887
6888#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6889 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4) {
6890 TEST_REQUIRES_ARM_NEON;
6891 GemmMicrokernelTester()
6892 .mr(4)
6893 .nr(8)
6894 .kr(1)
6895 .sr(1)
6896 .m(4)
6897 .n(8)
6898 .k(4)
6899 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6900 }
6901
6902 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cn) {
6903 TEST_REQUIRES_ARM_NEON;
6904 GemmMicrokernelTester()
6905 .mr(4)
6906 .nr(8)
6907 .kr(1)
6908 .sr(1)
6909 .m(4)
6910 .n(8)
6911 .k(4)
6912 .cn_stride(11)
6913 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6914 }
6915
6916 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile) {
6917 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006918 for (uint32_t n = 1; n <= 8; n++) {
6919 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006920 GemmMicrokernelTester()
6921 .mr(4)
6922 .nr(8)
6923 .kr(1)
6924 .sr(1)
6925 .m(m)
6926 .n(n)
6927 .k(4)
6928 .iterations(1)
6929 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6930 }
6931 }
6932 }
6933
6934 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
6935 TEST_REQUIRES_ARM_NEON;
6936 for (uint32_t m = 1; m <= 4; m++) {
6937 GemmMicrokernelTester()
6938 .mr(4)
6939 .nr(8)
6940 .kr(1)
6941 .sr(1)
6942 .m(m)
6943 .n(8)
6944 .k(4)
6945 .iterations(1)
6946 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6947 }
6948 }
6949
6950 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
6951 TEST_REQUIRES_ARM_NEON;
6952 for (uint32_t n = 1; n <= 8; n++) {
6953 GemmMicrokernelTester()
6954 .mr(4)
6955 .nr(8)
6956 .kr(1)
6957 .sr(1)
6958 .m(4)
6959 .n(n)
6960 .k(4)
6961 .iterations(1)
6962 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6963 }
6964 }
6965
6966 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4) {
6967 TEST_REQUIRES_ARM_NEON;
6968 for (size_t k = 1; k < 4; k++) {
6969 GemmMicrokernelTester()
6970 .mr(4)
6971 .nr(8)
6972 .kr(1)
6973 .sr(1)
6974 .m(4)
6975 .n(8)
6976 .k(k)
6977 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6978 }
6979 }
6980
6981 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_lt_4_subtile) {
6982 TEST_REQUIRES_ARM_NEON;
6983 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006984 for (uint32_t n = 1; n <= 8; n++) {
6985 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006986 GemmMicrokernelTester()
6987 .mr(4)
6988 .nr(8)
6989 .kr(1)
6990 .sr(1)
6991 .m(m)
6992 .n(n)
6993 .k(k)
6994 .iterations(1)
6995 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
6996 }
6997 }
6998 }
6999 }
7000
7001 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4) {
7002 TEST_REQUIRES_ARM_NEON;
7003 for (size_t k = 5; k < 8; k++) {
7004 GemmMicrokernelTester()
7005 .mr(4)
7006 .nr(8)
7007 .kr(1)
7008 .sr(1)
7009 .m(4)
7010 .n(8)
7011 .k(k)
7012 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7013 }
7014 }
7015
7016 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_gt_4_subtile) {
7017 TEST_REQUIRES_ARM_NEON;
7018 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007019 for (uint32_t n = 1; n <= 8; n++) {
7020 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007021 GemmMicrokernelTester()
7022 .mr(4)
7023 .nr(8)
7024 .kr(1)
7025 .sr(1)
7026 .m(m)
7027 .n(n)
7028 .k(k)
7029 .iterations(1)
7030 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7031 }
7032 }
7033 }
7034 }
7035
7036 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4) {
7037 TEST_REQUIRES_ARM_NEON;
7038 for (size_t k = 8; k <= 40; k += 4) {
7039 GemmMicrokernelTester()
7040 .mr(4)
7041 .nr(8)
7042 .kr(1)
7043 .sr(1)
7044 .m(4)
7045 .n(8)
7046 .k(k)
7047 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7048 }
7049 }
7050
7051 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, k_div_4_subtile) {
7052 TEST_REQUIRES_ARM_NEON;
7053 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007054 for (uint32_t n = 1; n <= 8; n++) {
7055 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007056 GemmMicrokernelTester()
7057 .mr(4)
7058 .nr(8)
7059 .kr(1)
7060 .sr(1)
7061 .m(m)
7062 .n(n)
7063 .k(k)
7064 .iterations(1)
7065 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7066 }
7067 }
7068 }
7069 }
7070
7071 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8) {
7072 TEST_REQUIRES_ARM_NEON;
7073 for (uint32_t n = 9; n < 16; n++) {
7074 for (size_t k = 1; k <= 20; k += 5) {
7075 GemmMicrokernelTester()
7076 .mr(4)
7077 .nr(8)
7078 .kr(1)
7079 .sr(1)
7080 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007081 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007082 .k(k)
7083 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7084 }
7085 }
7086 }
7087
7088 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
7089 TEST_REQUIRES_ARM_NEON;
7090 for (uint32_t n = 9; n < 16; n++) {
7091 for (size_t k = 1; k <= 20; k += 5) {
7092 GemmMicrokernelTester()
7093 .mr(4)
7094 .nr(8)
7095 .kr(1)
7096 .sr(1)
7097 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007098 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007099 .k(k)
7100 .cn_stride(11)
7101 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7102 }
7103 }
7104 }
7105
7106 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_subtile) {
7107 TEST_REQUIRES_ARM_NEON;
7108 for (uint32_t n = 9; n < 16; n++) {
7109 for (size_t k = 1; k <= 20; k += 5) {
7110 for (uint32_t m = 1; m <= 4; m++) {
7111 GemmMicrokernelTester()
7112 .mr(4)
7113 .nr(8)
7114 .kr(1)
7115 .sr(1)
7116 .m(m)
7117 .n(n)
7118 .k(k)
7119 .iterations(1)
7120 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7121 }
7122 }
7123 }
7124 }
7125
7126 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8) {
7127 TEST_REQUIRES_ARM_NEON;
7128 for (uint32_t n = 16; n <= 24; n += 8) {
7129 for (size_t k = 1; k <= 20; k += 5) {
7130 GemmMicrokernelTester()
7131 .mr(4)
7132 .nr(8)
7133 .kr(1)
7134 .sr(1)
7135 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007136 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007137 .k(k)
7138 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7139 }
7140 }
7141 }
7142
7143 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_strided_cn) {
7144 TEST_REQUIRES_ARM_NEON;
7145 for (uint32_t n = 16; n <= 24; n += 8) {
7146 for (size_t k = 1; k <= 20; k += 5) {
7147 GemmMicrokernelTester()
7148 .mr(4)
7149 .nr(8)
7150 .kr(1)
7151 .sr(1)
7152 .m(4)
7153 .n(n)
7154 .k(k)
7155 .cn_stride(11)
7156 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7157 }
7158 }
7159 }
7160
7161 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_subtile) {
7162 TEST_REQUIRES_ARM_NEON;
7163 for (uint32_t n = 16; n <= 24; n += 8) {
7164 for (size_t k = 1; k <= 20; k += 5) {
7165 for (uint32_t m = 1; m <= 4; m++) {
7166 GemmMicrokernelTester()
7167 .mr(4)
7168 .nr(8)
7169 .kr(1)
7170 .sr(1)
7171 .m(m)
7172 .n(n)
7173 .k(k)
7174 .iterations(1)
7175 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7176 }
7177 }
7178 }
7179 }
7180
7181 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, small_kernel) {
7182 TEST_REQUIRES_ARM_NEON;
7183 for (size_t k = 1; k <= 20; k += 5) {
7184 GemmMicrokernelTester()
7185 .mr(4)
7186 .nr(8)
7187 .kr(1)
7188 .sr(1)
7189 .m(4)
7190 .n(8)
7191 .k(k)
7192 .ks(3)
7193 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7194 }
7195 }
7196
7197 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, small_kernel_subtile) {
7198 TEST_REQUIRES_ARM_NEON;
7199 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007200 for (uint32_t n = 1; n <= 8; n++) {
7201 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007202 GemmMicrokernelTester()
7203 .mr(4)
7204 .nr(8)
7205 .kr(1)
7206 .sr(1)
7207 .m(m)
7208 .n(n)
7209 .k(k)
7210 .ks(3)
7211 .iterations(1)
7212 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7213 }
7214 }
7215 }
7216 }
7217
7218 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_gt_8_small_kernel) {
7219 TEST_REQUIRES_ARM_NEON;
7220 for (uint32_t n = 9; n < 16; n++) {
7221 for (size_t k = 1; k <= 20; k += 5) {
7222 GemmMicrokernelTester()
7223 .mr(4)
7224 .nr(8)
7225 .kr(1)
7226 .sr(1)
7227 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007228 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007229 .k(k)
7230 .ks(3)
7231 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7232 }
7233 }
7234 }
7235
7236 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, n_div_8_small_kernel) {
7237 TEST_REQUIRES_ARM_NEON;
7238 for (uint32_t n = 16; n <= 24; n += 8) {
7239 for (size_t k = 1; k <= 20; k += 5) {
7240 GemmMicrokernelTester()
7241 .mr(4)
7242 .nr(8)
7243 .kr(1)
7244 .sr(1)
7245 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007246 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007247 .k(k)
7248 .ks(3)
7249 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7250 }
7251 }
7252 }
7253
7254 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cm_subtile) {
7255 TEST_REQUIRES_ARM_NEON;
7256 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007257 for (uint32_t n = 1; n <= 8; n++) {
7258 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007259 GemmMicrokernelTester()
7260 .mr(4)
7261 .nr(8)
7262 .kr(1)
7263 .sr(1)
7264 .m(m)
7265 .n(n)
7266 .k(k)
7267 .cm_stride(11)
7268 .iterations(1)
7269 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7270 }
7271 }
7272 }
7273 }
7274
7275 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, a_offset) {
7276 TEST_REQUIRES_ARM_NEON;
7277 for (size_t k = 1; k <= 20; k += 5) {
7278 GemmMicrokernelTester()
7279 .mr(4)
7280 .nr(8)
7281 .kr(1)
7282 .sr(1)
7283 .m(4)
7284 .n(8)
7285 .k(k)
7286 .ks(3)
7287 .a_offset(83)
7288 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7289 }
7290 }
7291
7292 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, zero) {
7293 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007294 for (size_t k = 1; k <= 20; k += 5) {
7295 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007296 GemmMicrokernelTester()
7297 .mr(4)
7298 .nr(8)
7299 .kr(1)
7300 .sr(1)
7301 .m(4)
7302 .n(8)
7303 .k(k)
7304 .ks(3)
7305 .a_offset(83)
7306 .zero_index(mz)
7307 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7308 }
7309 }
7310 }
7311
7312 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, qmin) {
7313 TEST_REQUIRES_ARM_NEON;
7314 GemmMicrokernelTester()
7315 .mr(4)
7316 .nr(8)
7317 .kr(1)
7318 .sr(1)
7319 .m(4)
7320 .n(8)
7321 .k(4)
7322 .qmin(128)
7323 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7324 }
7325
7326 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, qmax) {
7327 TEST_REQUIRES_ARM_NEON;
7328 GemmMicrokernelTester()
7329 .mr(4)
7330 .nr(8)
7331 .kr(1)
7332 .sr(1)
7333 .m(4)
7334 .n(8)
7335 .k(4)
7336 .qmax(128)
7337 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7338 }
7339
7340 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD128, strided_cm) {
7341 TEST_REQUIRES_ARM_NEON;
7342 GemmMicrokernelTester()
7343 .mr(4)
7344 .nr(8)
7345 .kr(1)
7346 .sr(1)
7347 .m(4)
7348 .n(8)
7349 .k(4)
7350 .cm_stride(11)
7351 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7352 }
7353#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7354
7355
7356#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7357 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2) {
7358 TEST_REQUIRES_ARM_NEON;
7359 GemmMicrokernelTester()
7360 .mr(4)
7361 .nr(8)
7362 .kr(1)
7363 .sr(1)
7364 .m(4)
7365 .n(8)
7366 .k(2)
7367 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7368 }
7369
7370 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cn) {
7371 TEST_REQUIRES_ARM_NEON;
7372 GemmMicrokernelTester()
7373 .mr(4)
7374 .nr(8)
7375 .kr(1)
7376 .sr(1)
7377 .m(4)
7378 .n(8)
7379 .k(2)
7380 .cn_stride(11)
7381 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7382 }
7383
7384 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile) {
7385 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007386 for (uint32_t n = 1; n <= 8; n++) {
7387 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007388 GemmMicrokernelTester()
7389 .mr(4)
7390 .nr(8)
7391 .kr(1)
7392 .sr(1)
7393 .m(m)
7394 .n(n)
7395 .k(2)
7396 .iterations(1)
7397 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7398 }
7399 }
7400 }
7401
7402 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_m) {
7403 TEST_REQUIRES_ARM_NEON;
7404 for (uint32_t m = 1; m <= 4; m++) {
7405 GemmMicrokernelTester()
7406 .mr(4)
7407 .nr(8)
7408 .kr(1)
7409 .sr(1)
7410 .m(m)
7411 .n(8)
7412 .k(2)
7413 .iterations(1)
7414 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7415 }
7416 }
7417
7418 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_eq_2_subtile_n) {
7419 TEST_REQUIRES_ARM_NEON;
7420 for (uint32_t n = 1; n <= 8; n++) {
7421 GemmMicrokernelTester()
7422 .mr(4)
7423 .nr(8)
7424 .kr(1)
7425 .sr(1)
7426 .m(4)
7427 .n(n)
7428 .k(2)
7429 .iterations(1)
7430 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7431 }
7432 }
7433
7434 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2) {
7435 TEST_REQUIRES_ARM_NEON;
7436 for (size_t k = 1; k < 2; k++) {
7437 GemmMicrokernelTester()
7438 .mr(4)
7439 .nr(8)
7440 .kr(1)
7441 .sr(1)
7442 .m(4)
7443 .n(8)
7444 .k(k)
7445 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7446 }
7447 }
7448
7449 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_lt_2_subtile) {
7450 TEST_REQUIRES_ARM_NEON;
7451 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007452 for (uint32_t n = 1; n <= 8; n++) {
7453 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007454 GemmMicrokernelTester()
7455 .mr(4)
7456 .nr(8)
7457 .kr(1)
7458 .sr(1)
7459 .m(m)
7460 .n(n)
7461 .k(k)
7462 .iterations(1)
7463 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7464 }
7465 }
7466 }
7467 }
7468
7469 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2) {
7470 TEST_REQUIRES_ARM_NEON;
7471 for (size_t k = 3; k < 4; k++) {
7472 GemmMicrokernelTester()
7473 .mr(4)
7474 .nr(8)
7475 .kr(1)
7476 .sr(1)
7477 .m(4)
7478 .n(8)
7479 .k(k)
7480 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7481 }
7482 }
7483
7484 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_gt_2_subtile) {
7485 TEST_REQUIRES_ARM_NEON;
7486 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007487 for (uint32_t n = 1; n <= 8; n++) {
7488 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007489 GemmMicrokernelTester()
7490 .mr(4)
7491 .nr(8)
7492 .kr(1)
7493 .sr(1)
7494 .m(m)
7495 .n(n)
7496 .k(k)
7497 .iterations(1)
7498 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7499 }
7500 }
7501 }
7502 }
7503
7504 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2) {
7505 TEST_REQUIRES_ARM_NEON;
7506 for (size_t k = 4; k <= 20; k += 2) {
7507 GemmMicrokernelTester()
7508 .mr(4)
7509 .nr(8)
7510 .kr(1)
7511 .sr(1)
7512 .m(4)
7513 .n(8)
7514 .k(k)
7515 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7516 }
7517 }
7518
7519 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, k_div_2_subtile) {
7520 TEST_REQUIRES_ARM_NEON;
7521 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007522 for (uint32_t n = 1; n <= 8; n++) {
7523 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007524 GemmMicrokernelTester()
7525 .mr(4)
7526 .nr(8)
7527 .kr(1)
7528 .sr(1)
7529 .m(m)
7530 .n(n)
7531 .k(k)
7532 .iterations(1)
7533 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7534 }
7535 }
7536 }
7537 }
7538
7539 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8) {
7540 TEST_REQUIRES_ARM_NEON;
7541 for (uint32_t n = 9; n < 16; n++) {
7542 for (size_t k = 1; k <= 10; k += 3) {
7543 GemmMicrokernelTester()
7544 .mr(4)
7545 .nr(8)
7546 .kr(1)
7547 .sr(1)
7548 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007549 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007550 .k(k)
7551 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7552 }
7553 }
7554 }
7555
7556 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_strided_cn) {
7557 TEST_REQUIRES_ARM_NEON;
7558 for (uint32_t n = 9; n < 16; n++) {
7559 for (size_t k = 1; k <= 10; k += 3) {
7560 GemmMicrokernelTester()
7561 .mr(4)
7562 .nr(8)
7563 .kr(1)
7564 .sr(1)
7565 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007566 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007567 .k(k)
7568 .cn_stride(11)
7569 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7570 }
7571 }
7572 }
7573
7574 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_subtile) {
7575 TEST_REQUIRES_ARM_NEON;
7576 for (uint32_t n = 9; n < 16; n++) {
7577 for (size_t k = 1; k <= 10; k += 3) {
7578 for (uint32_t m = 1; m <= 4; m++) {
7579 GemmMicrokernelTester()
7580 .mr(4)
7581 .nr(8)
7582 .kr(1)
7583 .sr(1)
7584 .m(m)
7585 .n(n)
7586 .k(k)
7587 .iterations(1)
7588 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7589 }
7590 }
7591 }
7592 }
7593
7594 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8) {
7595 TEST_REQUIRES_ARM_NEON;
7596 for (uint32_t n = 16; n <= 24; n += 8) {
7597 for (size_t k = 1; k <= 10; k += 3) {
7598 GemmMicrokernelTester()
7599 .mr(4)
7600 .nr(8)
7601 .kr(1)
7602 .sr(1)
7603 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007604 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007605 .k(k)
7606 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7607 }
7608 }
7609 }
7610
7611 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_strided_cn) {
7612 TEST_REQUIRES_ARM_NEON;
7613 for (uint32_t n = 16; n <= 24; n += 8) {
7614 for (size_t k = 1; k <= 10; k += 3) {
7615 GemmMicrokernelTester()
7616 .mr(4)
7617 .nr(8)
7618 .kr(1)
7619 .sr(1)
7620 .m(4)
7621 .n(n)
7622 .k(k)
7623 .cn_stride(11)
7624 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7625 }
7626 }
7627 }
7628
7629 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_subtile) {
7630 TEST_REQUIRES_ARM_NEON;
7631 for (uint32_t n = 16; n <= 24; n += 8) {
7632 for (size_t k = 1; k <= 10; k += 3) {
7633 for (uint32_t m = 1; m <= 4; m++) {
7634 GemmMicrokernelTester()
7635 .mr(4)
7636 .nr(8)
7637 .kr(1)
7638 .sr(1)
7639 .m(m)
7640 .n(n)
7641 .k(k)
7642 .iterations(1)
7643 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7644 }
7645 }
7646 }
7647 }
7648
7649 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, small_kernel) {
7650 TEST_REQUIRES_ARM_NEON;
7651 for (size_t k = 1; k <= 10; k += 3) {
7652 GemmMicrokernelTester()
7653 .mr(4)
7654 .nr(8)
7655 .kr(1)
7656 .sr(1)
7657 .m(4)
7658 .n(8)
7659 .k(k)
7660 .ks(3)
7661 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7662 }
7663 }
7664
7665 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, small_kernel_subtile) {
7666 TEST_REQUIRES_ARM_NEON;
7667 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007668 for (uint32_t n = 1; n <= 8; n++) {
7669 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007670 GemmMicrokernelTester()
7671 .mr(4)
7672 .nr(8)
7673 .kr(1)
7674 .sr(1)
7675 .m(m)
7676 .n(n)
7677 .k(k)
7678 .ks(3)
7679 .iterations(1)
7680 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7681 }
7682 }
7683 }
7684 }
7685
7686 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_gt_8_small_kernel) {
7687 TEST_REQUIRES_ARM_NEON;
7688 for (uint32_t n = 9; n < 16; n++) {
7689 for (size_t k = 1; k <= 10; k += 3) {
7690 GemmMicrokernelTester()
7691 .mr(4)
7692 .nr(8)
7693 .kr(1)
7694 .sr(1)
7695 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007696 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007697 .k(k)
7698 .ks(3)
7699 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7700 }
7701 }
7702 }
7703
7704 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, n_div_8_small_kernel) {
7705 TEST_REQUIRES_ARM_NEON;
7706 for (uint32_t n = 16; n <= 24; n += 8) {
7707 for (size_t k = 1; k <= 10; k += 3) {
7708 GemmMicrokernelTester()
7709 .mr(4)
7710 .nr(8)
7711 .kr(1)
7712 .sr(1)
7713 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007714 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007715 .k(k)
7716 .ks(3)
7717 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7718 }
7719 }
7720 }
7721
7722 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cm_subtile) {
7723 TEST_REQUIRES_ARM_NEON;
7724 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007725 for (uint32_t n = 1; n <= 8; n++) {
7726 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007727 GemmMicrokernelTester()
7728 .mr(4)
7729 .nr(8)
7730 .kr(1)
7731 .sr(1)
7732 .m(m)
7733 .n(n)
7734 .k(k)
7735 .cm_stride(11)
7736 .iterations(1)
7737 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7738 }
7739 }
7740 }
7741 }
7742
7743 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, a_offset) {
7744 TEST_REQUIRES_ARM_NEON;
7745 for (size_t k = 1; k <= 10; k += 3) {
7746 GemmMicrokernelTester()
7747 .mr(4)
7748 .nr(8)
7749 .kr(1)
7750 .sr(1)
7751 .m(4)
7752 .n(8)
7753 .k(k)
7754 .ks(3)
7755 .a_offset(43)
7756 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7757 }
7758 }
7759
7760 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, zero) {
7761 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007762 for (size_t k = 1; k <= 10; k += 3) {
7763 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007764 GemmMicrokernelTester()
7765 .mr(4)
7766 .nr(8)
7767 .kr(1)
7768 .sr(1)
7769 .m(4)
7770 .n(8)
7771 .k(k)
7772 .ks(3)
7773 .a_offset(43)
7774 .zero_index(mz)
7775 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7776 }
7777 }
7778 }
7779
7780 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, qmin) {
7781 TEST_REQUIRES_ARM_NEON;
7782 GemmMicrokernelTester()
7783 .mr(4)
7784 .nr(8)
7785 .kr(1)
7786 .sr(1)
7787 .m(4)
7788 .n(8)
7789 .k(2)
7790 .qmin(128)
7791 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7792 }
7793
7794 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, qmax) {
7795 TEST_REQUIRES_ARM_NEON;
7796 GemmMicrokernelTester()
7797 .mr(4)
7798 .nr(8)
7799 .kr(1)
7800 .sr(1)
7801 .m(4)
7802 .n(8)
7803 .k(2)
7804 .qmax(128)
7805 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7806 }
7807
7808 TEST(F32_IGEMM_MINMAX_4X8__NEON_LANE_LD64, strided_cm) {
7809 TEST_REQUIRES_ARM_NEON;
7810 GemmMicrokernelTester()
7811 .mr(4)
7812 .nr(8)
7813 .kr(1)
7814 .sr(1)
7815 .m(4)
7816 .n(8)
7817 .k(2)
7818 .cm_stride(11)
7819 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, xnn_init_f32_minmax_scalar_params);
7820 }
7821#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7822
7823
7824#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7825 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4) {
7826 TEST_REQUIRES_ARM_NEON;
7827 GemmMicrokernelTester()
7828 .mr(6)
7829 .nr(8)
7830 .kr(1)
7831 .sr(1)
7832 .m(6)
7833 .n(8)
7834 .k(4)
7835 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7836 }
7837
7838 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cn) {
7839 TEST_REQUIRES_ARM_NEON;
7840 GemmMicrokernelTester()
7841 .mr(6)
7842 .nr(8)
7843 .kr(1)
7844 .sr(1)
7845 .m(6)
7846 .n(8)
7847 .k(4)
7848 .cn_stride(11)
7849 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7850 }
7851
7852 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile) {
7853 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007854 for (uint32_t n = 1; n <= 8; n++) {
7855 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007856 GemmMicrokernelTester()
7857 .mr(6)
7858 .nr(8)
7859 .kr(1)
7860 .sr(1)
7861 .m(m)
7862 .n(n)
7863 .k(4)
7864 .iterations(1)
7865 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7866 }
7867 }
7868 }
7869
7870 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_m) {
7871 TEST_REQUIRES_ARM_NEON;
7872 for (uint32_t m = 1; m <= 6; m++) {
7873 GemmMicrokernelTester()
7874 .mr(6)
7875 .nr(8)
7876 .kr(1)
7877 .sr(1)
7878 .m(m)
7879 .n(8)
7880 .k(4)
7881 .iterations(1)
7882 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7883 }
7884 }
7885
7886 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_eq_4_subtile_n) {
7887 TEST_REQUIRES_ARM_NEON;
7888 for (uint32_t n = 1; n <= 8; n++) {
7889 GemmMicrokernelTester()
7890 .mr(6)
7891 .nr(8)
7892 .kr(1)
7893 .sr(1)
7894 .m(6)
7895 .n(n)
7896 .k(4)
7897 .iterations(1)
7898 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7899 }
7900 }
7901
7902 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k < 4; k++) {
7905 GemmMicrokernelTester()
7906 .mr(6)
7907 .nr(8)
7908 .kr(1)
7909 .sr(1)
7910 .m(6)
7911 .n(8)
7912 .k(k)
7913 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7914 }
7915 }
7916
7917 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_lt_4_subtile) {
7918 TEST_REQUIRES_ARM_NEON;
7919 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007920 for (uint32_t n = 1; n <= 8; n++) {
7921 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007922 GemmMicrokernelTester()
7923 .mr(6)
7924 .nr(8)
7925 .kr(1)
7926 .sr(1)
7927 .m(m)
7928 .n(n)
7929 .k(k)
7930 .iterations(1)
7931 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7932 }
7933 }
7934 }
7935 }
7936
7937 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4) {
7938 TEST_REQUIRES_ARM_NEON;
7939 for (size_t k = 5; k < 8; k++) {
7940 GemmMicrokernelTester()
7941 .mr(6)
7942 .nr(8)
7943 .kr(1)
7944 .sr(1)
7945 .m(6)
7946 .n(8)
7947 .k(k)
7948 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7949 }
7950 }
7951
7952 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_gt_4_subtile) {
7953 TEST_REQUIRES_ARM_NEON;
7954 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007955 for (uint32_t n = 1; n <= 8; n++) {
7956 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007957 GemmMicrokernelTester()
7958 .mr(6)
7959 .nr(8)
7960 .kr(1)
7961 .sr(1)
7962 .m(m)
7963 .n(n)
7964 .k(k)
7965 .iterations(1)
7966 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7967 }
7968 }
7969 }
7970 }
7971
7972 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4) {
7973 TEST_REQUIRES_ARM_NEON;
7974 for (size_t k = 8; k <= 40; k += 4) {
7975 GemmMicrokernelTester()
7976 .mr(6)
7977 .nr(8)
7978 .kr(1)
7979 .sr(1)
7980 .m(6)
7981 .n(8)
7982 .k(k)
7983 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
7984 }
7985 }
7986
7987 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, k_div_4_subtile) {
7988 TEST_REQUIRES_ARM_NEON;
7989 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007990 for (uint32_t n = 1; n <= 8; n++) {
7991 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007992 GemmMicrokernelTester()
7993 .mr(6)
7994 .nr(8)
7995 .kr(1)
7996 .sr(1)
7997 .m(m)
7998 .n(n)
7999 .k(k)
8000 .iterations(1)
8001 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8002 }
8003 }
8004 }
8005 }
8006
8007 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8) {
8008 TEST_REQUIRES_ARM_NEON;
8009 for (uint32_t n = 9; n < 16; n++) {
8010 for (size_t k = 1; k <= 20; k += 5) {
8011 GemmMicrokernelTester()
8012 .mr(6)
8013 .nr(8)
8014 .kr(1)
8015 .sr(1)
8016 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008017 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008018 .k(k)
8019 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8020 }
8021 }
8022 }
8023
8024 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_strided_cn) {
8025 TEST_REQUIRES_ARM_NEON;
8026 for (uint32_t n = 9; n < 16; n++) {
8027 for (size_t k = 1; k <= 20; k += 5) {
8028 GemmMicrokernelTester()
8029 .mr(6)
8030 .nr(8)
8031 .kr(1)
8032 .sr(1)
8033 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008034 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008035 .k(k)
8036 .cn_stride(11)
8037 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8038 }
8039 }
8040 }
8041
8042 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_subtile) {
8043 TEST_REQUIRES_ARM_NEON;
8044 for (uint32_t n = 9; n < 16; n++) {
8045 for (size_t k = 1; k <= 20; k += 5) {
8046 for (uint32_t m = 1; m <= 6; m++) {
8047 GemmMicrokernelTester()
8048 .mr(6)
8049 .nr(8)
8050 .kr(1)
8051 .sr(1)
8052 .m(m)
8053 .n(n)
8054 .k(k)
8055 .iterations(1)
8056 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8057 }
8058 }
8059 }
8060 }
8061
8062 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8) {
8063 TEST_REQUIRES_ARM_NEON;
8064 for (uint32_t n = 16; n <= 24; n += 8) {
8065 for (size_t k = 1; k <= 20; k += 5) {
8066 GemmMicrokernelTester()
8067 .mr(6)
8068 .nr(8)
8069 .kr(1)
8070 .sr(1)
8071 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008072 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008073 .k(k)
8074 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8075 }
8076 }
8077 }
8078
8079 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_strided_cn) {
8080 TEST_REQUIRES_ARM_NEON;
8081 for (uint32_t n = 16; n <= 24; n += 8) {
8082 for (size_t k = 1; k <= 20; k += 5) {
8083 GemmMicrokernelTester()
8084 .mr(6)
8085 .nr(8)
8086 .kr(1)
8087 .sr(1)
8088 .m(6)
8089 .n(n)
8090 .k(k)
8091 .cn_stride(11)
8092 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8093 }
8094 }
8095 }
8096
8097 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_subtile) {
8098 TEST_REQUIRES_ARM_NEON;
8099 for (uint32_t n = 16; n <= 24; n += 8) {
8100 for (size_t k = 1; k <= 20; k += 5) {
8101 for (uint32_t m = 1; m <= 6; m++) {
8102 GemmMicrokernelTester()
8103 .mr(6)
8104 .nr(8)
8105 .kr(1)
8106 .sr(1)
8107 .m(m)
8108 .n(n)
8109 .k(k)
8110 .iterations(1)
8111 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8112 }
8113 }
8114 }
8115 }
8116
8117 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, small_kernel) {
8118 TEST_REQUIRES_ARM_NEON;
8119 for (size_t k = 1; k <= 20; k += 5) {
8120 GemmMicrokernelTester()
8121 .mr(6)
8122 .nr(8)
8123 .kr(1)
8124 .sr(1)
8125 .m(6)
8126 .n(8)
8127 .k(k)
8128 .ks(3)
8129 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8130 }
8131 }
8132
8133 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, small_kernel_subtile) {
8134 TEST_REQUIRES_ARM_NEON;
8135 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008136 for (uint32_t n = 1; n <= 8; n++) {
8137 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008138 GemmMicrokernelTester()
8139 .mr(6)
8140 .nr(8)
8141 .kr(1)
8142 .sr(1)
8143 .m(m)
8144 .n(n)
8145 .k(k)
8146 .ks(3)
8147 .iterations(1)
8148 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8149 }
8150 }
8151 }
8152 }
8153
8154 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_gt_8_small_kernel) {
8155 TEST_REQUIRES_ARM_NEON;
8156 for (uint32_t n = 9; n < 16; n++) {
8157 for (size_t k = 1; k <= 20; k += 5) {
8158 GemmMicrokernelTester()
8159 .mr(6)
8160 .nr(8)
8161 .kr(1)
8162 .sr(1)
8163 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008164 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008165 .k(k)
8166 .ks(3)
8167 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8168 }
8169 }
8170 }
8171
8172 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, n_div_8_small_kernel) {
8173 TEST_REQUIRES_ARM_NEON;
8174 for (uint32_t n = 16; n <= 24; n += 8) {
8175 for (size_t k = 1; k <= 20; k += 5) {
8176 GemmMicrokernelTester()
8177 .mr(6)
8178 .nr(8)
8179 .kr(1)
8180 .sr(1)
8181 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008182 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008183 .k(k)
8184 .ks(3)
8185 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8186 }
8187 }
8188 }
8189
8190 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cm_subtile) {
8191 TEST_REQUIRES_ARM_NEON;
8192 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008193 for (uint32_t n = 1; n <= 8; n++) {
8194 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008195 GemmMicrokernelTester()
8196 .mr(6)
8197 .nr(8)
8198 .kr(1)
8199 .sr(1)
8200 .m(m)
8201 .n(n)
8202 .k(k)
8203 .cm_stride(11)
8204 .iterations(1)
8205 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8206 }
8207 }
8208 }
8209 }
8210
8211 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, a_offset) {
8212 TEST_REQUIRES_ARM_NEON;
8213 for (size_t k = 1; k <= 20; k += 5) {
8214 GemmMicrokernelTester()
8215 .mr(6)
8216 .nr(8)
8217 .kr(1)
8218 .sr(1)
8219 .m(6)
8220 .n(8)
8221 .k(k)
8222 .ks(3)
8223 .a_offset(127)
8224 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8225 }
8226 }
8227
8228 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, zero) {
8229 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008230 for (size_t k = 1; k <= 20; k += 5) {
8231 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008232 GemmMicrokernelTester()
8233 .mr(6)
8234 .nr(8)
8235 .kr(1)
8236 .sr(1)
8237 .m(6)
8238 .n(8)
8239 .k(k)
8240 .ks(3)
8241 .a_offset(127)
8242 .zero_index(mz)
8243 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8244 }
8245 }
8246 }
8247
8248 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, qmin) {
8249 TEST_REQUIRES_ARM_NEON;
8250 GemmMicrokernelTester()
8251 .mr(6)
8252 .nr(8)
8253 .kr(1)
8254 .sr(1)
8255 .m(6)
8256 .n(8)
8257 .k(4)
8258 .qmin(128)
8259 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8260 }
8261
8262 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, qmax) {
8263 TEST_REQUIRES_ARM_NEON;
8264 GemmMicrokernelTester()
8265 .mr(6)
8266 .nr(8)
8267 .kr(1)
8268 .sr(1)
8269 .m(6)
8270 .n(8)
8271 .k(4)
8272 .qmax(128)
8273 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8274 }
8275
8276 TEST(F32_IGEMM_MINMAX_6X8__NEON_LANE_LD128, strided_cm) {
8277 TEST_REQUIRES_ARM_NEON;
8278 GemmMicrokernelTester()
8279 .mr(6)
8280 .nr(8)
8281 .kr(1)
8282 .sr(1)
8283 .m(6)
8284 .n(8)
8285 .k(4)
8286 .cm_stride(11)
8287 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, xnn_init_f32_minmax_scalar_params);
8288 }
8289#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8290
8291
8292#if XNN_ARCH_ARM64
8293 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2) {
8294 TEST_REQUIRES_ARM_NEON_FMA;
8295 GemmMicrokernelTester()
8296 .mr(1)
8297 .nr(8)
8298 .kr(1)
8299 .sr(1)
8300 .m(1)
8301 .n(8)
8302 .k(2)
8303 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8304 }
8305
8306 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cn) {
8307 TEST_REQUIRES_ARM_NEON_FMA;
8308 GemmMicrokernelTester()
8309 .mr(1)
8310 .nr(8)
8311 .kr(1)
8312 .sr(1)
8313 .m(1)
8314 .n(8)
8315 .k(2)
8316 .cn_stride(11)
8317 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8318 }
8319
8320 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
8321 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008322 for (uint32_t n = 1; n <= 8; n++) {
8323 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008324 GemmMicrokernelTester()
8325 .mr(1)
8326 .nr(8)
8327 .kr(1)
8328 .sr(1)
8329 .m(m)
8330 .n(n)
8331 .k(2)
8332 .iterations(1)
8333 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8334 }
8335 }
8336 }
8337
8338 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
8339 TEST_REQUIRES_ARM_NEON_FMA;
8340 for (uint32_t m = 1; m <= 1; m++) {
8341 GemmMicrokernelTester()
8342 .mr(1)
8343 .nr(8)
8344 .kr(1)
8345 .sr(1)
8346 .m(m)
8347 .n(8)
8348 .k(2)
8349 .iterations(1)
8350 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8351 }
8352 }
8353
8354 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
8355 TEST_REQUIRES_ARM_NEON_FMA;
8356 for (uint32_t n = 1; n <= 8; n++) {
8357 GemmMicrokernelTester()
8358 .mr(1)
8359 .nr(8)
8360 .kr(1)
8361 .sr(1)
8362 .m(1)
8363 .n(n)
8364 .k(2)
8365 .iterations(1)
8366 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8367 }
8368 }
8369
8370 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2) {
8371 TEST_REQUIRES_ARM_NEON_FMA;
8372 for (size_t k = 1; k < 2; k++) {
8373 GemmMicrokernelTester()
8374 .mr(1)
8375 .nr(8)
8376 .kr(1)
8377 .sr(1)
8378 .m(1)
8379 .n(8)
8380 .k(k)
8381 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8382 }
8383 }
8384
8385 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
8386 TEST_REQUIRES_ARM_NEON_FMA;
8387 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008388 for (uint32_t n = 1; n <= 8; n++) {
8389 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008390 GemmMicrokernelTester()
8391 .mr(1)
8392 .nr(8)
8393 .kr(1)
8394 .sr(1)
8395 .m(m)
8396 .n(n)
8397 .k(k)
8398 .iterations(1)
8399 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8400 }
8401 }
8402 }
8403 }
8404
8405 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2) {
8406 TEST_REQUIRES_ARM_NEON_FMA;
8407 for (size_t k = 3; k < 4; k++) {
8408 GemmMicrokernelTester()
8409 .mr(1)
8410 .nr(8)
8411 .kr(1)
8412 .sr(1)
8413 .m(1)
8414 .n(8)
8415 .k(k)
8416 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8417 }
8418 }
8419
8420 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
8421 TEST_REQUIRES_ARM_NEON_FMA;
8422 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008423 for (uint32_t n = 1; n <= 8; n++) {
8424 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008425 GemmMicrokernelTester()
8426 .mr(1)
8427 .nr(8)
8428 .kr(1)
8429 .sr(1)
8430 .m(m)
8431 .n(n)
8432 .k(k)
8433 .iterations(1)
8434 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8435 }
8436 }
8437 }
8438 }
8439
8440 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2) {
8441 TEST_REQUIRES_ARM_NEON_FMA;
8442 for (size_t k = 4; k <= 20; k += 2) {
8443 GemmMicrokernelTester()
8444 .mr(1)
8445 .nr(8)
8446 .kr(1)
8447 .sr(1)
8448 .m(1)
8449 .n(8)
8450 .k(k)
8451 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8452 }
8453 }
8454
8455 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
8456 TEST_REQUIRES_ARM_NEON_FMA;
8457 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008458 for (uint32_t n = 1; n <= 8; n++) {
8459 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008460 GemmMicrokernelTester()
8461 .mr(1)
8462 .nr(8)
8463 .kr(1)
8464 .sr(1)
8465 .m(m)
8466 .n(n)
8467 .k(k)
8468 .iterations(1)
8469 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8470 }
8471 }
8472 }
8473 }
8474
8475 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8) {
8476 TEST_REQUIRES_ARM_NEON_FMA;
8477 for (uint32_t n = 9; n < 16; n++) {
8478 for (size_t k = 1; k <= 10; k += 3) {
8479 GemmMicrokernelTester()
8480 .mr(1)
8481 .nr(8)
8482 .kr(1)
8483 .sr(1)
8484 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008485 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008486 .k(k)
8487 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8488 }
8489 }
8490 }
8491
8492 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
8493 TEST_REQUIRES_ARM_NEON_FMA;
8494 for (uint32_t n = 9; n < 16; n++) {
8495 for (size_t k = 1; k <= 10; k += 3) {
8496 GemmMicrokernelTester()
8497 .mr(1)
8498 .nr(8)
8499 .kr(1)
8500 .sr(1)
8501 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008502 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008503 .k(k)
8504 .cn_stride(11)
8505 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8506 }
8507 }
8508 }
8509
8510 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
8511 TEST_REQUIRES_ARM_NEON_FMA;
8512 for (uint32_t n = 9; n < 16; n++) {
8513 for (size_t k = 1; k <= 10; k += 3) {
8514 for (uint32_t m = 1; m <= 1; m++) {
8515 GemmMicrokernelTester()
8516 .mr(1)
8517 .nr(8)
8518 .kr(1)
8519 .sr(1)
8520 .m(m)
8521 .n(n)
8522 .k(k)
8523 .iterations(1)
8524 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8525 }
8526 }
8527 }
8528 }
8529
8530 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8) {
8531 TEST_REQUIRES_ARM_NEON_FMA;
8532 for (uint32_t n = 16; n <= 24; n += 8) {
8533 for (size_t k = 1; k <= 10; k += 3) {
8534 GemmMicrokernelTester()
8535 .mr(1)
8536 .nr(8)
8537 .kr(1)
8538 .sr(1)
8539 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008540 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008541 .k(k)
8542 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8543 }
8544 }
8545 }
8546
8547 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
8548 TEST_REQUIRES_ARM_NEON_FMA;
8549 for (uint32_t n = 16; n <= 24; n += 8) {
8550 for (size_t k = 1; k <= 10; k += 3) {
8551 GemmMicrokernelTester()
8552 .mr(1)
8553 .nr(8)
8554 .kr(1)
8555 .sr(1)
8556 .m(1)
8557 .n(n)
8558 .k(k)
8559 .cn_stride(11)
8560 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8561 }
8562 }
8563 }
8564
8565 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
8566 TEST_REQUIRES_ARM_NEON_FMA;
8567 for (uint32_t n = 16; n <= 24; n += 8) {
8568 for (size_t k = 1; k <= 10; k += 3) {
8569 for (uint32_t m = 1; m <= 1; m++) {
8570 GemmMicrokernelTester()
8571 .mr(1)
8572 .nr(8)
8573 .kr(1)
8574 .sr(1)
8575 .m(m)
8576 .n(n)
8577 .k(k)
8578 .iterations(1)
8579 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8580 }
8581 }
8582 }
8583 }
8584
8585 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, small_kernel) {
8586 TEST_REQUIRES_ARM_NEON_FMA;
8587 for (size_t k = 1; k <= 10; k += 3) {
8588 GemmMicrokernelTester()
8589 .mr(1)
8590 .nr(8)
8591 .kr(1)
8592 .sr(1)
8593 .m(1)
8594 .n(8)
8595 .k(k)
8596 .ks(3)
8597 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8598 }
8599 }
8600
8601 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, small_kernel_subtile) {
8602 TEST_REQUIRES_ARM_NEON_FMA;
8603 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008604 for (uint32_t n = 1; n <= 8; n++) {
8605 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008606 GemmMicrokernelTester()
8607 .mr(1)
8608 .nr(8)
8609 .kr(1)
8610 .sr(1)
8611 .m(m)
8612 .n(n)
8613 .k(k)
8614 .ks(3)
8615 .iterations(1)
8616 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8617 }
8618 }
8619 }
8620 }
8621
8622 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_gt_8_small_kernel) {
8623 TEST_REQUIRES_ARM_NEON_FMA;
8624 for (uint32_t n = 9; n < 16; n++) {
8625 for (size_t k = 1; k <= 10; k += 3) {
8626 GemmMicrokernelTester()
8627 .mr(1)
8628 .nr(8)
8629 .kr(1)
8630 .sr(1)
8631 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008632 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008633 .k(k)
8634 .ks(3)
8635 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8636 }
8637 }
8638 }
8639
8640 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, n_div_8_small_kernel) {
8641 TEST_REQUIRES_ARM_NEON_FMA;
8642 for (uint32_t n = 16; n <= 24; n += 8) {
8643 for (size_t k = 1; k <= 10; k += 3) {
8644 GemmMicrokernelTester()
8645 .mr(1)
8646 .nr(8)
8647 .kr(1)
8648 .sr(1)
8649 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008650 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008651 .k(k)
8652 .ks(3)
8653 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8654 }
8655 }
8656 }
8657
8658 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
8659 TEST_REQUIRES_ARM_NEON_FMA;
8660 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008661 for (uint32_t n = 1; n <= 8; n++) {
8662 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008663 GemmMicrokernelTester()
8664 .mr(1)
8665 .nr(8)
8666 .kr(1)
8667 .sr(1)
8668 .m(m)
8669 .n(n)
8670 .k(k)
8671 .cm_stride(11)
8672 .iterations(1)
8673 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8674 }
8675 }
8676 }
8677 }
8678
8679 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, a_offset) {
8680 TEST_REQUIRES_ARM_NEON_FMA;
8681 for (size_t k = 1; k <= 10; k += 3) {
8682 GemmMicrokernelTester()
8683 .mr(1)
8684 .nr(8)
8685 .kr(1)
8686 .sr(1)
8687 .m(1)
8688 .n(8)
8689 .k(k)
8690 .ks(3)
8691 .a_offset(13)
8692 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8693 }
8694 }
8695
8696 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, zero) {
8697 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008698 for (size_t k = 1; k <= 10; k += 3) {
8699 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008700 GemmMicrokernelTester()
8701 .mr(1)
8702 .nr(8)
8703 .kr(1)
8704 .sr(1)
8705 .m(1)
8706 .n(8)
8707 .k(k)
8708 .ks(3)
8709 .a_offset(13)
8710 .zero_index(mz)
8711 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8712 }
8713 }
8714 }
8715
8716 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, qmin) {
8717 TEST_REQUIRES_ARM_NEON_FMA;
8718 GemmMicrokernelTester()
8719 .mr(1)
8720 .nr(8)
8721 .kr(1)
8722 .sr(1)
8723 .m(1)
8724 .n(8)
8725 .k(2)
8726 .qmin(128)
8727 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8728 }
8729
8730 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, qmax) {
8731 TEST_REQUIRES_ARM_NEON_FMA;
8732 GemmMicrokernelTester()
8733 .mr(1)
8734 .nr(8)
8735 .kr(1)
8736 .sr(1)
8737 .m(1)
8738 .n(8)
8739 .k(2)
8740 .qmax(128)
8741 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8742 }
8743
8744 TEST(F32_IGEMM_MINMAX_1X8__NEONFMA_LANE_LD64, strided_cm) {
8745 TEST_REQUIRES_ARM_NEON_FMA;
8746 GemmMicrokernelTester()
8747 .mr(1)
8748 .nr(8)
8749 .kr(1)
8750 .sr(1)
8751 .m(1)
8752 .n(8)
8753 .k(2)
8754 .cm_stride(11)
8755 .Test(xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8756 }
8757#endif // XNN_ARCH_ARM64
8758
8759
8760#if XNN_ARCH_ARM64
8761 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2) {
8762 TEST_REQUIRES_ARM_NEON_FMA;
8763 GemmMicrokernelTester()
8764 .mr(4)
8765 .nr(2)
8766 .kr(1)
8767 .sr(1)
8768 .m(4)
8769 .n(2)
8770 .k(2)
8771 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8772 }
8773
8774 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, strided_cn) {
8775 TEST_REQUIRES_ARM_NEON_FMA;
8776 GemmMicrokernelTester()
8777 .mr(4)
8778 .nr(2)
8779 .kr(1)
8780 .sr(1)
8781 .m(4)
8782 .n(2)
8783 .k(2)
8784 .cn_stride(5)
8785 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8786 }
8787
8788 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2_subtile) {
8789 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008790 for (uint32_t n = 1; n <= 2; n++) {
8791 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008792 GemmMicrokernelTester()
8793 .mr(4)
8794 .nr(2)
8795 .kr(1)
8796 .sr(1)
8797 .m(m)
8798 .n(n)
8799 .k(2)
8800 .iterations(1)
8801 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8802 }
8803 }
8804 }
8805
8806 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
8807 TEST_REQUIRES_ARM_NEON_FMA;
8808 for (uint32_t m = 1; m <= 4; m++) {
8809 GemmMicrokernelTester()
8810 .mr(4)
8811 .nr(2)
8812 .kr(1)
8813 .sr(1)
8814 .m(m)
8815 .n(2)
8816 .k(2)
8817 .iterations(1)
8818 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8819 }
8820 }
8821
8822 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
8823 TEST_REQUIRES_ARM_NEON_FMA;
8824 for (uint32_t n = 1; n <= 2; n++) {
8825 GemmMicrokernelTester()
8826 .mr(4)
8827 .nr(2)
8828 .kr(1)
8829 .sr(1)
8830 .m(4)
8831 .n(n)
8832 .k(2)
8833 .iterations(1)
8834 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8835 }
8836 }
8837
8838 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_lt_2) {
8839 TEST_REQUIRES_ARM_NEON_FMA;
8840 for (size_t k = 1; k < 2; k++) {
8841 GemmMicrokernelTester()
8842 .mr(4)
8843 .nr(2)
8844 .kr(1)
8845 .sr(1)
8846 .m(4)
8847 .n(2)
8848 .k(k)
8849 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8850 }
8851 }
8852
8853 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_lt_2_subtile) {
8854 TEST_REQUIRES_ARM_NEON_FMA;
8855 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008856 for (uint32_t n = 1; n <= 2; n++) {
8857 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008858 GemmMicrokernelTester()
8859 .mr(4)
8860 .nr(2)
8861 .kr(1)
8862 .sr(1)
8863 .m(m)
8864 .n(n)
8865 .k(k)
8866 .iterations(1)
8867 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8868 }
8869 }
8870 }
8871 }
8872
8873 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_gt_2) {
8874 TEST_REQUIRES_ARM_NEON_FMA;
8875 for (size_t k = 3; k < 4; k++) {
8876 GemmMicrokernelTester()
8877 .mr(4)
8878 .nr(2)
8879 .kr(1)
8880 .sr(1)
8881 .m(4)
8882 .n(2)
8883 .k(k)
8884 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8885 }
8886 }
8887
8888 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_gt_2_subtile) {
8889 TEST_REQUIRES_ARM_NEON_FMA;
8890 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008891 for (uint32_t n = 1; n <= 2; n++) {
8892 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008893 GemmMicrokernelTester()
8894 .mr(4)
8895 .nr(2)
8896 .kr(1)
8897 .sr(1)
8898 .m(m)
8899 .n(n)
8900 .k(k)
8901 .iterations(1)
8902 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8903 }
8904 }
8905 }
8906 }
8907
8908 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_div_2) {
8909 TEST_REQUIRES_ARM_NEON_FMA;
8910 for (size_t k = 4; k <= 20; k += 2) {
8911 GemmMicrokernelTester()
8912 .mr(4)
8913 .nr(2)
8914 .kr(1)
8915 .sr(1)
8916 .m(4)
8917 .n(2)
8918 .k(k)
8919 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8920 }
8921 }
8922
8923 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, k_div_2_subtile) {
8924 TEST_REQUIRES_ARM_NEON_FMA;
8925 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008926 for (uint32_t n = 1; n <= 2; n++) {
8927 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008928 GemmMicrokernelTester()
8929 .mr(4)
8930 .nr(2)
8931 .kr(1)
8932 .sr(1)
8933 .m(m)
8934 .n(n)
8935 .k(k)
8936 .iterations(1)
8937 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8938 }
8939 }
8940 }
8941 }
8942
8943 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2) {
8944 TEST_REQUIRES_ARM_NEON_FMA;
8945 for (uint32_t n = 3; n < 4; n++) {
8946 for (size_t k = 1; k <= 10; k += 3) {
8947 GemmMicrokernelTester()
8948 .mr(4)
8949 .nr(2)
8950 .kr(1)
8951 .sr(1)
8952 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008953 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008954 .k(k)
8955 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8956 }
8957 }
8958 }
8959
8960 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2_strided_cn) {
8961 TEST_REQUIRES_ARM_NEON_FMA;
8962 for (uint32_t n = 3; n < 4; n++) {
8963 for (size_t k = 1; k <= 10; k += 3) {
8964 GemmMicrokernelTester()
8965 .mr(4)
8966 .nr(2)
8967 .kr(1)
8968 .sr(1)
8969 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008970 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008971 .k(k)
8972 .cn_stride(5)
8973 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8974 }
8975 }
8976 }
8977
8978 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2_subtile) {
8979 TEST_REQUIRES_ARM_NEON_FMA;
8980 for (uint32_t n = 3; n < 4; n++) {
8981 for (size_t k = 1; k <= 10; k += 3) {
8982 for (uint32_t m = 1; m <= 4; m++) {
8983 GemmMicrokernelTester()
8984 .mr(4)
8985 .nr(2)
8986 .kr(1)
8987 .sr(1)
8988 .m(m)
8989 .n(n)
8990 .k(k)
8991 .iterations(1)
8992 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
8993 }
8994 }
8995 }
8996 }
8997
8998 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2) {
8999 TEST_REQUIRES_ARM_NEON_FMA;
9000 for (uint32_t n = 4; n <= 6; n += 2) {
9001 for (size_t k = 1; k <= 10; k += 3) {
9002 GemmMicrokernelTester()
9003 .mr(4)
9004 .nr(2)
9005 .kr(1)
9006 .sr(1)
9007 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009008 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009009 .k(k)
9010 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9011 }
9012 }
9013 }
9014
9015 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2_strided_cn) {
9016 TEST_REQUIRES_ARM_NEON_FMA;
9017 for (uint32_t n = 4; n <= 6; n += 2) {
9018 for (size_t k = 1; k <= 10; k += 3) {
9019 GemmMicrokernelTester()
9020 .mr(4)
9021 .nr(2)
9022 .kr(1)
9023 .sr(1)
9024 .m(4)
9025 .n(n)
9026 .k(k)
9027 .cn_stride(5)
9028 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9029 }
9030 }
9031 }
9032
9033 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2_subtile) {
9034 TEST_REQUIRES_ARM_NEON_FMA;
9035 for (uint32_t n = 4; n <= 6; n += 2) {
9036 for (size_t k = 1; k <= 10; k += 3) {
9037 for (uint32_t m = 1; m <= 4; m++) {
9038 GemmMicrokernelTester()
9039 .mr(4)
9040 .nr(2)
9041 .kr(1)
9042 .sr(1)
9043 .m(m)
9044 .n(n)
9045 .k(k)
9046 .iterations(1)
9047 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9048 }
9049 }
9050 }
9051 }
9052
9053 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, small_kernel) {
9054 TEST_REQUIRES_ARM_NEON_FMA;
9055 for (size_t k = 1; k <= 10; k += 3) {
9056 GemmMicrokernelTester()
9057 .mr(4)
9058 .nr(2)
9059 .kr(1)
9060 .sr(1)
9061 .m(4)
9062 .n(2)
9063 .k(k)
9064 .ks(3)
9065 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9066 }
9067 }
9068
9069 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, small_kernel_subtile) {
9070 TEST_REQUIRES_ARM_NEON_FMA;
9071 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009072 for (uint32_t n = 1; n <= 2; n++) {
9073 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009074 GemmMicrokernelTester()
9075 .mr(4)
9076 .nr(2)
9077 .kr(1)
9078 .sr(1)
9079 .m(m)
9080 .n(n)
9081 .k(k)
9082 .ks(3)
9083 .iterations(1)
9084 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9085 }
9086 }
9087 }
9088 }
9089
9090 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_gt_2_small_kernel) {
9091 TEST_REQUIRES_ARM_NEON_FMA;
9092 for (uint32_t n = 3; n < 4; n++) {
9093 for (size_t k = 1; k <= 10; k += 3) {
9094 GemmMicrokernelTester()
9095 .mr(4)
9096 .nr(2)
9097 .kr(1)
9098 .sr(1)
9099 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009100 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009101 .k(k)
9102 .ks(3)
9103 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9104 }
9105 }
9106 }
9107
9108 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, n_div_2_small_kernel) {
9109 TEST_REQUIRES_ARM_NEON_FMA;
9110 for (uint32_t n = 4; n <= 6; n += 2) {
9111 for (size_t k = 1; k <= 10; k += 3) {
9112 GemmMicrokernelTester()
9113 .mr(4)
9114 .nr(2)
9115 .kr(1)
9116 .sr(1)
9117 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009118 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009119 .k(k)
9120 .ks(3)
9121 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9122 }
9123 }
9124 }
9125
9126 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, strided_cm_subtile) {
9127 TEST_REQUIRES_ARM_NEON_FMA;
9128 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009129 for (uint32_t n = 1; n <= 2; n++) {
9130 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009131 GemmMicrokernelTester()
9132 .mr(4)
9133 .nr(2)
9134 .kr(1)
9135 .sr(1)
9136 .m(m)
9137 .n(n)
9138 .k(k)
9139 .cm_stride(5)
9140 .iterations(1)
9141 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9142 }
9143 }
9144 }
9145 }
9146
9147 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, a_offset) {
9148 TEST_REQUIRES_ARM_NEON_FMA;
9149 for (size_t k = 1; k <= 10; k += 3) {
9150 GemmMicrokernelTester()
9151 .mr(4)
9152 .nr(2)
9153 .kr(1)
9154 .sr(1)
9155 .m(4)
9156 .n(2)
9157 .k(k)
9158 .ks(3)
9159 .a_offset(43)
9160 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9161 }
9162 }
9163
9164 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, zero) {
9165 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009166 for (size_t k = 1; k <= 10; k += 3) {
9167 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009168 GemmMicrokernelTester()
9169 .mr(4)
9170 .nr(2)
9171 .kr(1)
9172 .sr(1)
9173 .m(4)
9174 .n(2)
9175 .k(k)
9176 .ks(3)
9177 .a_offset(43)
9178 .zero_index(mz)
9179 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9180 }
9181 }
9182 }
9183
9184 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, qmin) {
9185 TEST_REQUIRES_ARM_NEON_FMA;
9186 GemmMicrokernelTester()
9187 .mr(4)
9188 .nr(2)
9189 .kr(1)
9190 .sr(1)
9191 .m(4)
9192 .n(2)
9193 .k(2)
9194 .qmin(128)
9195 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9196 }
9197
9198 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, qmax) {
9199 TEST_REQUIRES_ARM_NEON_FMA;
9200 GemmMicrokernelTester()
9201 .mr(4)
9202 .nr(2)
9203 .kr(1)
9204 .sr(1)
9205 .m(4)
9206 .n(2)
9207 .k(2)
9208 .qmax(128)
9209 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9210 }
9211
9212 TEST(F32_IGEMM_MINMAX_4X2__NEONFMA_LANE_LD64, strided_cm) {
9213 TEST_REQUIRES_ARM_NEON_FMA;
9214 GemmMicrokernelTester()
9215 .mr(4)
9216 .nr(2)
9217 .kr(1)
9218 .sr(1)
9219 .m(4)
9220 .n(2)
9221 .k(2)
9222 .cm_stride(5)
9223 .Test(xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9224 }
9225#endif // XNN_ARCH_ARM64
9226
9227
9228#if XNN_ARCH_ARM64
9229 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4) {
9230 TEST_REQUIRES_ARM_NEON_FMA;
9231 GemmMicrokernelTester()
9232 .mr(4)
9233 .nr(8)
9234 .kr(1)
9235 .sr(1)
9236 .m(4)
9237 .n(8)
9238 .k(4)
9239 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9240 }
9241
9242 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cn) {
9243 TEST_REQUIRES_ARM_NEON_FMA;
9244 GemmMicrokernelTester()
9245 .mr(4)
9246 .nr(8)
9247 .kr(1)
9248 .sr(1)
9249 .m(4)
9250 .n(8)
9251 .k(4)
9252 .cn_stride(11)
9253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9254 }
9255
9256 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile) {
9257 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009258 for (uint32_t n = 1; n <= 8; n++) {
9259 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009260 GemmMicrokernelTester()
9261 .mr(4)
9262 .nr(8)
9263 .kr(1)
9264 .sr(1)
9265 .m(m)
9266 .n(n)
9267 .k(4)
9268 .iterations(1)
9269 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9270 }
9271 }
9272 }
9273
9274 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_m) {
9275 TEST_REQUIRES_ARM_NEON_FMA;
9276 for (uint32_t m = 1; m <= 4; m++) {
9277 GemmMicrokernelTester()
9278 .mr(4)
9279 .nr(8)
9280 .kr(1)
9281 .sr(1)
9282 .m(m)
9283 .n(8)
9284 .k(4)
9285 .iterations(1)
9286 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9287 }
9288 }
9289
9290 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_eq_4_subtile_n) {
9291 TEST_REQUIRES_ARM_NEON_FMA;
9292 for (uint32_t n = 1; n <= 8; n++) {
9293 GemmMicrokernelTester()
9294 .mr(4)
9295 .nr(8)
9296 .kr(1)
9297 .sr(1)
9298 .m(4)
9299 .n(n)
9300 .k(4)
9301 .iterations(1)
9302 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9303 }
9304 }
9305
9306 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4) {
9307 TEST_REQUIRES_ARM_NEON_FMA;
9308 for (size_t k = 1; k < 4; k++) {
9309 GemmMicrokernelTester()
9310 .mr(4)
9311 .nr(8)
9312 .kr(1)
9313 .sr(1)
9314 .m(4)
9315 .n(8)
9316 .k(k)
9317 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9318 }
9319 }
9320
9321 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_lt_4_subtile) {
9322 TEST_REQUIRES_ARM_NEON_FMA;
9323 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009324 for (uint32_t n = 1; n <= 8; n++) {
9325 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009326 GemmMicrokernelTester()
9327 .mr(4)
9328 .nr(8)
9329 .kr(1)
9330 .sr(1)
9331 .m(m)
9332 .n(n)
9333 .k(k)
9334 .iterations(1)
9335 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9336 }
9337 }
9338 }
9339 }
9340
9341 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4) {
9342 TEST_REQUIRES_ARM_NEON_FMA;
9343 for (size_t k = 5; k < 8; k++) {
9344 GemmMicrokernelTester()
9345 .mr(4)
9346 .nr(8)
9347 .kr(1)
9348 .sr(1)
9349 .m(4)
9350 .n(8)
9351 .k(k)
9352 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9353 }
9354 }
9355
9356 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_gt_4_subtile) {
9357 TEST_REQUIRES_ARM_NEON_FMA;
9358 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009359 for (uint32_t n = 1; n <= 8; n++) {
9360 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009361 GemmMicrokernelTester()
9362 .mr(4)
9363 .nr(8)
9364 .kr(1)
9365 .sr(1)
9366 .m(m)
9367 .n(n)
9368 .k(k)
9369 .iterations(1)
9370 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9371 }
9372 }
9373 }
9374 }
9375
9376 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4) {
9377 TEST_REQUIRES_ARM_NEON_FMA;
9378 for (size_t k = 8; k <= 40; k += 4) {
9379 GemmMicrokernelTester()
9380 .mr(4)
9381 .nr(8)
9382 .kr(1)
9383 .sr(1)
9384 .m(4)
9385 .n(8)
9386 .k(k)
9387 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9388 }
9389 }
9390
9391 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, k_div_4_subtile) {
9392 TEST_REQUIRES_ARM_NEON_FMA;
9393 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009394 for (uint32_t n = 1; n <= 8; n++) {
9395 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009396 GemmMicrokernelTester()
9397 .mr(4)
9398 .nr(8)
9399 .kr(1)
9400 .sr(1)
9401 .m(m)
9402 .n(n)
9403 .k(k)
9404 .iterations(1)
9405 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9406 }
9407 }
9408 }
9409 }
9410
9411 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8) {
9412 TEST_REQUIRES_ARM_NEON_FMA;
9413 for (uint32_t n = 9; n < 16; n++) {
9414 for (size_t k = 1; k <= 20; k += 5) {
9415 GemmMicrokernelTester()
9416 .mr(4)
9417 .nr(8)
9418 .kr(1)
9419 .sr(1)
9420 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009421 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009422 .k(k)
9423 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9424 }
9425 }
9426 }
9427
9428 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_strided_cn) {
9429 TEST_REQUIRES_ARM_NEON_FMA;
9430 for (uint32_t n = 9; n < 16; n++) {
9431 for (size_t k = 1; k <= 20; k += 5) {
9432 GemmMicrokernelTester()
9433 .mr(4)
9434 .nr(8)
9435 .kr(1)
9436 .sr(1)
9437 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009438 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009439 .k(k)
9440 .cn_stride(11)
9441 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9442 }
9443 }
9444 }
9445
9446 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_subtile) {
9447 TEST_REQUIRES_ARM_NEON_FMA;
9448 for (uint32_t n = 9; n < 16; n++) {
9449 for (size_t k = 1; k <= 20; k += 5) {
9450 for (uint32_t m = 1; m <= 4; m++) {
9451 GemmMicrokernelTester()
9452 .mr(4)
9453 .nr(8)
9454 .kr(1)
9455 .sr(1)
9456 .m(m)
9457 .n(n)
9458 .k(k)
9459 .iterations(1)
9460 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9461 }
9462 }
9463 }
9464 }
9465
9466 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8) {
9467 TEST_REQUIRES_ARM_NEON_FMA;
9468 for (uint32_t n = 16; n <= 24; n += 8) {
9469 for (size_t k = 1; k <= 20; k += 5) {
9470 GemmMicrokernelTester()
9471 .mr(4)
9472 .nr(8)
9473 .kr(1)
9474 .sr(1)
9475 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009476 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009477 .k(k)
9478 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9479 }
9480 }
9481 }
9482
9483 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_strided_cn) {
9484 TEST_REQUIRES_ARM_NEON_FMA;
9485 for (uint32_t n = 16; n <= 24; n += 8) {
9486 for (size_t k = 1; k <= 20; k += 5) {
9487 GemmMicrokernelTester()
9488 .mr(4)
9489 .nr(8)
9490 .kr(1)
9491 .sr(1)
9492 .m(4)
9493 .n(n)
9494 .k(k)
9495 .cn_stride(11)
9496 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9497 }
9498 }
9499 }
9500
9501 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_subtile) {
9502 TEST_REQUIRES_ARM_NEON_FMA;
9503 for (uint32_t n = 16; n <= 24; n += 8) {
9504 for (size_t k = 1; k <= 20; k += 5) {
9505 for (uint32_t m = 1; m <= 4; m++) {
9506 GemmMicrokernelTester()
9507 .mr(4)
9508 .nr(8)
9509 .kr(1)
9510 .sr(1)
9511 .m(m)
9512 .n(n)
9513 .k(k)
9514 .iterations(1)
9515 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9516 }
9517 }
9518 }
9519 }
9520
9521 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, small_kernel) {
9522 TEST_REQUIRES_ARM_NEON_FMA;
9523 for (size_t k = 1; k <= 20; k += 5) {
9524 GemmMicrokernelTester()
9525 .mr(4)
9526 .nr(8)
9527 .kr(1)
9528 .sr(1)
9529 .m(4)
9530 .n(8)
9531 .k(k)
9532 .ks(3)
9533 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9534 }
9535 }
9536
9537 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, small_kernel_subtile) {
9538 TEST_REQUIRES_ARM_NEON_FMA;
9539 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009540 for (uint32_t n = 1; n <= 8; n++) {
9541 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009542 GemmMicrokernelTester()
9543 .mr(4)
9544 .nr(8)
9545 .kr(1)
9546 .sr(1)
9547 .m(m)
9548 .n(n)
9549 .k(k)
9550 .ks(3)
9551 .iterations(1)
9552 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9553 }
9554 }
9555 }
9556 }
9557
9558 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_gt_8_small_kernel) {
9559 TEST_REQUIRES_ARM_NEON_FMA;
9560 for (uint32_t n = 9; n < 16; n++) {
9561 for (size_t k = 1; k <= 20; k += 5) {
9562 GemmMicrokernelTester()
9563 .mr(4)
9564 .nr(8)
9565 .kr(1)
9566 .sr(1)
9567 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009568 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009569 .k(k)
9570 .ks(3)
9571 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9572 }
9573 }
9574 }
9575
9576 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, n_div_8_small_kernel) {
9577 TEST_REQUIRES_ARM_NEON_FMA;
9578 for (uint32_t n = 16; n <= 24; n += 8) {
9579 for (size_t k = 1; k <= 20; k += 5) {
9580 GemmMicrokernelTester()
9581 .mr(4)
9582 .nr(8)
9583 .kr(1)
9584 .sr(1)
9585 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009586 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009587 .k(k)
9588 .ks(3)
9589 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9590 }
9591 }
9592 }
9593
9594 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm_subtile) {
9595 TEST_REQUIRES_ARM_NEON_FMA;
9596 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009597 for (uint32_t n = 1; n <= 8; n++) {
9598 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009599 GemmMicrokernelTester()
9600 .mr(4)
9601 .nr(8)
9602 .kr(1)
9603 .sr(1)
9604 .m(m)
9605 .n(n)
9606 .k(k)
9607 .cm_stride(11)
9608 .iterations(1)
9609 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9610 }
9611 }
9612 }
9613 }
9614
9615 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, a_offset) {
9616 TEST_REQUIRES_ARM_NEON_FMA;
9617 for (size_t k = 1; k <= 20; k += 5) {
9618 GemmMicrokernelTester()
9619 .mr(4)
9620 .nr(8)
9621 .kr(1)
9622 .sr(1)
9623 .m(4)
9624 .n(8)
9625 .k(k)
9626 .ks(3)
9627 .a_offset(83)
9628 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9629 }
9630 }
9631
9632 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, zero) {
9633 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009634 for (size_t k = 1; k <= 20; k += 5) {
9635 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009636 GemmMicrokernelTester()
9637 .mr(4)
9638 .nr(8)
9639 .kr(1)
9640 .sr(1)
9641 .m(4)
9642 .n(8)
9643 .k(k)
9644 .ks(3)
9645 .a_offset(83)
9646 .zero_index(mz)
9647 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9648 }
9649 }
9650 }
9651
9652 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, qmin) {
9653 TEST_REQUIRES_ARM_NEON_FMA;
9654 GemmMicrokernelTester()
9655 .mr(4)
9656 .nr(8)
9657 .kr(1)
9658 .sr(1)
9659 .m(4)
9660 .n(8)
9661 .k(4)
9662 .qmin(128)
9663 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9664 }
9665
9666 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, qmax) {
9667 TEST_REQUIRES_ARM_NEON_FMA;
9668 GemmMicrokernelTester()
9669 .mr(4)
9670 .nr(8)
9671 .kr(1)
9672 .sr(1)
9673 .m(4)
9674 .n(8)
9675 .k(4)
9676 .qmax(128)
9677 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9678 }
9679
9680 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD128, strided_cm) {
9681 TEST_REQUIRES_ARM_NEON_FMA;
9682 GemmMicrokernelTester()
9683 .mr(4)
9684 .nr(8)
9685 .kr(1)
9686 .sr(1)
9687 .m(4)
9688 .n(8)
9689 .k(4)
9690 .cm_stride(11)
9691 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, xnn_init_f32_minmax_scalar_params);
9692 }
9693#endif // XNN_ARCH_ARM64
9694
9695
9696#if XNN_ARCH_ARM64
9697 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2) {
9698 TEST_REQUIRES_ARM_NEON_FMA;
9699 GemmMicrokernelTester()
9700 .mr(4)
9701 .nr(8)
9702 .kr(1)
9703 .sr(1)
9704 .m(4)
9705 .n(8)
9706 .k(2)
9707 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9708 }
9709
9710 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cn) {
9711 TEST_REQUIRES_ARM_NEON_FMA;
9712 GemmMicrokernelTester()
9713 .mr(4)
9714 .nr(8)
9715 .kr(1)
9716 .sr(1)
9717 .m(4)
9718 .n(8)
9719 .k(2)
9720 .cn_stride(11)
9721 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9722 }
9723
9724 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
9725 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009726 for (uint32_t n = 1; n <= 8; n++) {
9727 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009728 GemmMicrokernelTester()
9729 .mr(4)
9730 .nr(8)
9731 .kr(1)
9732 .sr(1)
9733 .m(m)
9734 .n(n)
9735 .k(2)
9736 .iterations(1)
9737 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9738 }
9739 }
9740 }
9741
9742 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
9743 TEST_REQUIRES_ARM_NEON_FMA;
9744 for (uint32_t m = 1; m <= 4; m++) {
9745 GemmMicrokernelTester()
9746 .mr(4)
9747 .nr(8)
9748 .kr(1)
9749 .sr(1)
9750 .m(m)
9751 .n(8)
9752 .k(2)
9753 .iterations(1)
9754 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9755 }
9756 }
9757
9758 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
9759 TEST_REQUIRES_ARM_NEON_FMA;
9760 for (uint32_t n = 1; n <= 8; n++) {
9761 GemmMicrokernelTester()
9762 .mr(4)
9763 .nr(8)
9764 .kr(1)
9765 .sr(1)
9766 .m(4)
9767 .n(n)
9768 .k(2)
9769 .iterations(1)
9770 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9771 }
9772 }
9773
9774 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2) {
9775 TEST_REQUIRES_ARM_NEON_FMA;
9776 for (size_t k = 1; k < 2; k++) {
9777 GemmMicrokernelTester()
9778 .mr(4)
9779 .nr(8)
9780 .kr(1)
9781 .sr(1)
9782 .m(4)
9783 .n(8)
9784 .k(k)
9785 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9786 }
9787 }
9788
9789 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
9790 TEST_REQUIRES_ARM_NEON_FMA;
9791 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009792 for (uint32_t n = 1; n <= 8; n++) {
9793 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009794 GemmMicrokernelTester()
9795 .mr(4)
9796 .nr(8)
9797 .kr(1)
9798 .sr(1)
9799 .m(m)
9800 .n(n)
9801 .k(k)
9802 .iterations(1)
9803 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9804 }
9805 }
9806 }
9807 }
9808
9809 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2) {
9810 TEST_REQUIRES_ARM_NEON_FMA;
9811 for (size_t k = 3; k < 4; k++) {
9812 GemmMicrokernelTester()
9813 .mr(4)
9814 .nr(8)
9815 .kr(1)
9816 .sr(1)
9817 .m(4)
9818 .n(8)
9819 .k(k)
9820 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9821 }
9822 }
9823
9824 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
9825 TEST_REQUIRES_ARM_NEON_FMA;
9826 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009827 for (uint32_t n = 1; n <= 8; n++) {
9828 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009829 GemmMicrokernelTester()
9830 .mr(4)
9831 .nr(8)
9832 .kr(1)
9833 .sr(1)
9834 .m(m)
9835 .n(n)
9836 .k(k)
9837 .iterations(1)
9838 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9839 }
9840 }
9841 }
9842 }
9843
9844 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2) {
9845 TEST_REQUIRES_ARM_NEON_FMA;
9846 for (size_t k = 4; k <= 20; k += 2) {
9847 GemmMicrokernelTester()
9848 .mr(4)
9849 .nr(8)
9850 .kr(1)
9851 .sr(1)
9852 .m(4)
9853 .n(8)
9854 .k(k)
9855 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9856 }
9857 }
9858
9859 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
9860 TEST_REQUIRES_ARM_NEON_FMA;
9861 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009862 for (uint32_t n = 1; n <= 8; n++) {
9863 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009864 GemmMicrokernelTester()
9865 .mr(4)
9866 .nr(8)
9867 .kr(1)
9868 .sr(1)
9869 .m(m)
9870 .n(n)
9871 .k(k)
9872 .iterations(1)
9873 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9874 }
9875 }
9876 }
9877 }
9878
9879 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8) {
9880 TEST_REQUIRES_ARM_NEON_FMA;
9881 for (uint32_t n = 9; n < 16; n++) {
9882 for (size_t k = 1; k <= 10; k += 3) {
9883 GemmMicrokernelTester()
9884 .mr(4)
9885 .nr(8)
9886 .kr(1)
9887 .sr(1)
9888 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009889 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009890 .k(k)
9891 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9892 }
9893 }
9894 }
9895
9896 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
9897 TEST_REQUIRES_ARM_NEON_FMA;
9898 for (uint32_t n = 9; n < 16; n++) {
9899 for (size_t k = 1; k <= 10; k += 3) {
9900 GemmMicrokernelTester()
9901 .mr(4)
9902 .nr(8)
9903 .kr(1)
9904 .sr(1)
9905 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009906 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009907 .k(k)
9908 .cn_stride(11)
9909 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9910 }
9911 }
9912 }
9913
9914 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
9915 TEST_REQUIRES_ARM_NEON_FMA;
9916 for (uint32_t n = 9; n < 16; n++) {
9917 for (size_t k = 1; k <= 10; k += 3) {
9918 for (uint32_t m = 1; m <= 4; m++) {
9919 GemmMicrokernelTester()
9920 .mr(4)
9921 .nr(8)
9922 .kr(1)
9923 .sr(1)
9924 .m(m)
9925 .n(n)
9926 .k(k)
9927 .iterations(1)
9928 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9929 }
9930 }
9931 }
9932 }
9933
9934 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8) {
9935 TEST_REQUIRES_ARM_NEON_FMA;
9936 for (uint32_t n = 16; n <= 24; n += 8) {
9937 for (size_t k = 1; k <= 10; k += 3) {
9938 GemmMicrokernelTester()
9939 .mr(4)
9940 .nr(8)
9941 .kr(1)
9942 .sr(1)
9943 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009944 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009945 .k(k)
9946 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9947 }
9948 }
9949 }
9950
9951 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
9952 TEST_REQUIRES_ARM_NEON_FMA;
9953 for (uint32_t n = 16; n <= 24; n += 8) {
9954 for (size_t k = 1; k <= 10; k += 3) {
9955 GemmMicrokernelTester()
9956 .mr(4)
9957 .nr(8)
9958 .kr(1)
9959 .sr(1)
9960 .m(4)
9961 .n(n)
9962 .k(k)
9963 .cn_stride(11)
9964 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9965 }
9966 }
9967 }
9968
9969 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
9970 TEST_REQUIRES_ARM_NEON_FMA;
9971 for (uint32_t n = 16; n <= 24; n += 8) {
9972 for (size_t k = 1; k <= 10; k += 3) {
9973 for (uint32_t m = 1; m <= 4; m++) {
9974 GemmMicrokernelTester()
9975 .mr(4)
9976 .nr(8)
9977 .kr(1)
9978 .sr(1)
9979 .m(m)
9980 .n(n)
9981 .k(k)
9982 .iterations(1)
9983 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
9984 }
9985 }
9986 }
9987 }
9988
9989 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, small_kernel) {
9990 TEST_REQUIRES_ARM_NEON_FMA;
9991 for (size_t k = 1; k <= 10; k += 3) {
9992 GemmMicrokernelTester()
9993 .mr(4)
9994 .nr(8)
9995 .kr(1)
9996 .sr(1)
9997 .m(4)
9998 .n(8)
9999 .k(k)
10000 .ks(3)
10001 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10002 }
10003 }
10004
10005 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, small_kernel_subtile) {
10006 TEST_REQUIRES_ARM_NEON_FMA;
10007 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010008 for (uint32_t n = 1; n <= 8; n++) {
10009 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010010 GemmMicrokernelTester()
10011 .mr(4)
10012 .nr(8)
10013 .kr(1)
10014 .sr(1)
10015 .m(m)
10016 .n(n)
10017 .k(k)
10018 .ks(3)
10019 .iterations(1)
10020 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10021 }
10022 }
10023 }
10024 }
10025
10026 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_gt_8_small_kernel) {
10027 TEST_REQUIRES_ARM_NEON_FMA;
10028 for (uint32_t n = 9; n < 16; n++) {
10029 for (size_t k = 1; k <= 10; k += 3) {
10030 GemmMicrokernelTester()
10031 .mr(4)
10032 .nr(8)
10033 .kr(1)
10034 .sr(1)
10035 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010036 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010037 .k(k)
10038 .ks(3)
10039 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10040 }
10041 }
10042 }
10043
10044 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, n_div_8_small_kernel) {
10045 TEST_REQUIRES_ARM_NEON_FMA;
10046 for (uint32_t n = 16; n <= 24; n += 8) {
10047 for (size_t k = 1; k <= 10; k += 3) {
10048 GemmMicrokernelTester()
10049 .mr(4)
10050 .nr(8)
10051 .kr(1)
10052 .sr(1)
10053 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010054 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010055 .k(k)
10056 .ks(3)
10057 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10058 }
10059 }
10060 }
10061
10062 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
10063 TEST_REQUIRES_ARM_NEON_FMA;
10064 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010065 for (uint32_t n = 1; n <= 8; n++) {
10066 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010067 GemmMicrokernelTester()
10068 .mr(4)
10069 .nr(8)
10070 .kr(1)
10071 .sr(1)
10072 .m(m)
10073 .n(n)
10074 .k(k)
10075 .cm_stride(11)
10076 .iterations(1)
10077 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10078 }
10079 }
10080 }
10081 }
10082
10083 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, a_offset) {
10084 TEST_REQUIRES_ARM_NEON_FMA;
10085 for (size_t k = 1; k <= 10; k += 3) {
10086 GemmMicrokernelTester()
10087 .mr(4)
10088 .nr(8)
10089 .kr(1)
10090 .sr(1)
10091 .m(4)
10092 .n(8)
10093 .k(k)
10094 .ks(3)
10095 .a_offset(43)
10096 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10097 }
10098 }
10099
10100 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, zero) {
10101 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010102 for (size_t k = 1; k <= 10; k += 3) {
10103 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010104 GemmMicrokernelTester()
10105 .mr(4)
10106 .nr(8)
10107 .kr(1)
10108 .sr(1)
10109 .m(4)
10110 .n(8)
10111 .k(k)
10112 .ks(3)
10113 .a_offset(43)
10114 .zero_index(mz)
10115 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10116 }
10117 }
10118 }
10119
10120 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, qmin) {
10121 TEST_REQUIRES_ARM_NEON_FMA;
10122 GemmMicrokernelTester()
10123 .mr(4)
10124 .nr(8)
10125 .kr(1)
10126 .sr(1)
10127 .m(4)
10128 .n(8)
10129 .k(2)
10130 .qmin(128)
10131 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10132 }
10133
10134 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, qmax) {
10135 TEST_REQUIRES_ARM_NEON_FMA;
10136 GemmMicrokernelTester()
10137 .mr(4)
10138 .nr(8)
10139 .kr(1)
10140 .sr(1)
10141 .m(4)
10142 .n(8)
10143 .k(2)
10144 .qmax(128)
10145 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10146 }
10147
10148 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_LANE_LD64, strided_cm) {
10149 TEST_REQUIRES_ARM_NEON_FMA;
10150 GemmMicrokernelTester()
10151 .mr(4)
10152 .nr(8)
10153 .kr(1)
10154 .sr(1)
10155 .m(4)
10156 .n(8)
10157 .k(2)
10158 .cm_stride(11)
10159 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10160 }
10161#endif // XNN_ARCH_ARM64
10162
10163
10164#if XNN_ARCH_ARM64
10165 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2) {
10166 TEST_REQUIRES_ARM_NEON_FMA;
10167 GemmMicrokernelTester()
10168 .mr(6)
10169 .nr(8)
10170 .kr(1)
10171 .sr(1)
10172 .m(6)
10173 .n(8)
10174 .k(2)
10175 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10176 }
10177
10178 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cn) {
10179 TEST_REQUIRES_ARM_NEON_FMA;
10180 GemmMicrokernelTester()
10181 .mr(6)
10182 .nr(8)
10183 .kr(1)
10184 .sr(1)
10185 .m(6)
10186 .n(8)
10187 .k(2)
10188 .cn_stride(11)
10189 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10190 }
10191
10192 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile) {
10193 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010194 for (uint32_t n = 1; n <= 8; n++) {
10195 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010196 GemmMicrokernelTester()
10197 .mr(6)
10198 .nr(8)
10199 .kr(1)
10200 .sr(1)
10201 .m(m)
10202 .n(n)
10203 .k(2)
10204 .iterations(1)
10205 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10206 }
10207 }
10208 }
10209
10210 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_m) {
10211 TEST_REQUIRES_ARM_NEON_FMA;
10212 for (uint32_t m = 1; m <= 6; m++) {
10213 GemmMicrokernelTester()
10214 .mr(6)
10215 .nr(8)
10216 .kr(1)
10217 .sr(1)
10218 .m(m)
10219 .n(8)
10220 .k(2)
10221 .iterations(1)
10222 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10223 }
10224 }
10225
10226 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_eq_2_subtile_n) {
10227 TEST_REQUIRES_ARM_NEON_FMA;
10228 for (uint32_t n = 1; n <= 8; n++) {
10229 GemmMicrokernelTester()
10230 .mr(6)
10231 .nr(8)
10232 .kr(1)
10233 .sr(1)
10234 .m(6)
10235 .n(n)
10236 .k(2)
10237 .iterations(1)
10238 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10239 }
10240 }
10241
10242 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2) {
10243 TEST_REQUIRES_ARM_NEON_FMA;
10244 for (size_t k = 1; k < 2; k++) {
10245 GemmMicrokernelTester()
10246 .mr(6)
10247 .nr(8)
10248 .kr(1)
10249 .sr(1)
10250 .m(6)
10251 .n(8)
10252 .k(k)
10253 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10254 }
10255 }
10256
10257 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_lt_2_subtile) {
10258 TEST_REQUIRES_ARM_NEON_FMA;
10259 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010260 for (uint32_t n = 1; n <= 8; n++) {
10261 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010262 GemmMicrokernelTester()
10263 .mr(6)
10264 .nr(8)
10265 .kr(1)
10266 .sr(1)
10267 .m(m)
10268 .n(n)
10269 .k(k)
10270 .iterations(1)
10271 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10272 }
10273 }
10274 }
10275 }
10276
10277 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2) {
10278 TEST_REQUIRES_ARM_NEON_FMA;
10279 for (size_t k = 3; k < 4; k++) {
10280 GemmMicrokernelTester()
10281 .mr(6)
10282 .nr(8)
10283 .kr(1)
10284 .sr(1)
10285 .m(6)
10286 .n(8)
10287 .k(k)
10288 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10289 }
10290 }
10291
10292 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_gt_2_subtile) {
10293 TEST_REQUIRES_ARM_NEON_FMA;
10294 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010295 for (uint32_t n = 1; n <= 8; n++) {
10296 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010297 GemmMicrokernelTester()
10298 .mr(6)
10299 .nr(8)
10300 .kr(1)
10301 .sr(1)
10302 .m(m)
10303 .n(n)
10304 .k(k)
10305 .iterations(1)
10306 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10307 }
10308 }
10309 }
10310 }
10311
10312 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2) {
10313 TEST_REQUIRES_ARM_NEON_FMA;
10314 for (size_t k = 4; k <= 20; k += 2) {
10315 GemmMicrokernelTester()
10316 .mr(6)
10317 .nr(8)
10318 .kr(1)
10319 .sr(1)
10320 .m(6)
10321 .n(8)
10322 .k(k)
10323 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10324 }
10325 }
10326
10327 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, k_div_2_subtile) {
10328 TEST_REQUIRES_ARM_NEON_FMA;
10329 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010330 for (uint32_t n = 1; n <= 8; n++) {
10331 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010332 GemmMicrokernelTester()
10333 .mr(6)
10334 .nr(8)
10335 .kr(1)
10336 .sr(1)
10337 .m(m)
10338 .n(n)
10339 .k(k)
10340 .iterations(1)
10341 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10342 }
10343 }
10344 }
10345 }
10346
10347 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8) {
10348 TEST_REQUIRES_ARM_NEON_FMA;
10349 for (uint32_t n = 9; n < 16; n++) {
10350 for (size_t k = 1; k <= 10; k += 3) {
10351 GemmMicrokernelTester()
10352 .mr(6)
10353 .nr(8)
10354 .kr(1)
10355 .sr(1)
10356 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010357 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010358 .k(k)
10359 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10360 }
10361 }
10362 }
10363
10364 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_strided_cn) {
10365 TEST_REQUIRES_ARM_NEON_FMA;
10366 for (uint32_t n = 9; n < 16; n++) {
10367 for (size_t k = 1; k <= 10; k += 3) {
10368 GemmMicrokernelTester()
10369 .mr(6)
10370 .nr(8)
10371 .kr(1)
10372 .sr(1)
10373 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010374 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010375 .k(k)
10376 .cn_stride(11)
10377 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10378 }
10379 }
10380 }
10381
10382 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_subtile) {
10383 TEST_REQUIRES_ARM_NEON_FMA;
10384 for (uint32_t n = 9; n < 16; n++) {
10385 for (size_t k = 1; k <= 10; k += 3) {
10386 for (uint32_t m = 1; m <= 6; m++) {
10387 GemmMicrokernelTester()
10388 .mr(6)
10389 .nr(8)
10390 .kr(1)
10391 .sr(1)
10392 .m(m)
10393 .n(n)
10394 .k(k)
10395 .iterations(1)
10396 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10397 }
10398 }
10399 }
10400 }
10401
10402 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8) {
10403 TEST_REQUIRES_ARM_NEON_FMA;
10404 for (uint32_t n = 16; n <= 24; n += 8) {
10405 for (size_t k = 1; k <= 10; k += 3) {
10406 GemmMicrokernelTester()
10407 .mr(6)
10408 .nr(8)
10409 .kr(1)
10410 .sr(1)
10411 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010412 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010413 .k(k)
10414 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10415 }
10416 }
10417 }
10418
10419 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_strided_cn) {
10420 TEST_REQUIRES_ARM_NEON_FMA;
10421 for (uint32_t n = 16; n <= 24; n += 8) {
10422 for (size_t k = 1; k <= 10; k += 3) {
10423 GemmMicrokernelTester()
10424 .mr(6)
10425 .nr(8)
10426 .kr(1)
10427 .sr(1)
10428 .m(6)
10429 .n(n)
10430 .k(k)
10431 .cn_stride(11)
10432 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10433 }
10434 }
10435 }
10436
10437 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_subtile) {
10438 TEST_REQUIRES_ARM_NEON_FMA;
10439 for (uint32_t n = 16; n <= 24; n += 8) {
10440 for (size_t k = 1; k <= 10; k += 3) {
10441 for (uint32_t m = 1; m <= 6; m++) {
10442 GemmMicrokernelTester()
10443 .mr(6)
10444 .nr(8)
10445 .kr(1)
10446 .sr(1)
10447 .m(m)
10448 .n(n)
10449 .k(k)
10450 .iterations(1)
10451 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10452 }
10453 }
10454 }
10455 }
10456
10457 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, small_kernel) {
10458 TEST_REQUIRES_ARM_NEON_FMA;
10459 for (size_t k = 1; k <= 10; k += 3) {
10460 GemmMicrokernelTester()
10461 .mr(6)
10462 .nr(8)
10463 .kr(1)
10464 .sr(1)
10465 .m(6)
10466 .n(8)
10467 .k(k)
10468 .ks(3)
10469 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10470 }
10471 }
10472
10473 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, small_kernel_subtile) {
10474 TEST_REQUIRES_ARM_NEON_FMA;
10475 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010476 for (uint32_t n = 1; n <= 8; n++) {
10477 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010478 GemmMicrokernelTester()
10479 .mr(6)
10480 .nr(8)
10481 .kr(1)
10482 .sr(1)
10483 .m(m)
10484 .n(n)
10485 .k(k)
10486 .ks(3)
10487 .iterations(1)
10488 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10489 }
10490 }
10491 }
10492 }
10493
10494 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_gt_8_small_kernel) {
10495 TEST_REQUIRES_ARM_NEON_FMA;
10496 for (uint32_t n = 9; n < 16; n++) {
10497 for (size_t k = 1; k <= 10; k += 3) {
10498 GemmMicrokernelTester()
10499 .mr(6)
10500 .nr(8)
10501 .kr(1)
10502 .sr(1)
10503 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010504 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010505 .k(k)
10506 .ks(3)
10507 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10508 }
10509 }
10510 }
10511
10512 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, n_div_8_small_kernel) {
10513 TEST_REQUIRES_ARM_NEON_FMA;
10514 for (uint32_t n = 16; n <= 24; n += 8) {
10515 for (size_t k = 1; k <= 10; k += 3) {
10516 GemmMicrokernelTester()
10517 .mr(6)
10518 .nr(8)
10519 .kr(1)
10520 .sr(1)
10521 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010522 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010523 .k(k)
10524 .ks(3)
10525 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10526 }
10527 }
10528 }
10529
10530 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm_subtile) {
10531 TEST_REQUIRES_ARM_NEON_FMA;
10532 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010533 for (uint32_t n = 1; n <= 8; n++) {
10534 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010535 GemmMicrokernelTester()
10536 .mr(6)
10537 .nr(8)
10538 .kr(1)
10539 .sr(1)
10540 .m(m)
10541 .n(n)
10542 .k(k)
10543 .cm_stride(11)
10544 .iterations(1)
10545 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10546 }
10547 }
10548 }
10549 }
10550
10551 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, a_offset) {
10552 TEST_REQUIRES_ARM_NEON_FMA;
10553 for (size_t k = 1; k <= 10; k += 3) {
10554 GemmMicrokernelTester()
10555 .mr(6)
10556 .nr(8)
10557 .kr(1)
10558 .sr(1)
10559 .m(6)
10560 .n(8)
10561 .k(k)
10562 .ks(3)
10563 .a_offset(67)
10564 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10565 }
10566 }
10567
10568 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, zero) {
10569 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010570 for (size_t k = 1; k <= 10; k += 3) {
10571 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010572 GemmMicrokernelTester()
10573 .mr(6)
10574 .nr(8)
10575 .kr(1)
10576 .sr(1)
10577 .m(6)
10578 .n(8)
10579 .k(k)
10580 .ks(3)
10581 .a_offset(67)
10582 .zero_index(mz)
10583 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10584 }
10585 }
10586 }
10587
10588 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, qmin) {
10589 TEST_REQUIRES_ARM_NEON_FMA;
10590 GemmMicrokernelTester()
10591 .mr(6)
10592 .nr(8)
10593 .kr(1)
10594 .sr(1)
10595 .m(6)
10596 .n(8)
10597 .k(2)
10598 .qmin(128)
10599 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10600 }
10601
10602 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, qmax) {
10603 TEST_REQUIRES_ARM_NEON_FMA;
10604 GemmMicrokernelTester()
10605 .mr(6)
10606 .nr(8)
10607 .kr(1)
10608 .sr(1)
10609 .m(6)
10610 .n(8)
10611 .k(2)
10612 .qmax(128)
10613 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10614 }
10615
10616 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_LANE_LD64, strided_cm) {
10617 TEST_REQUIRES_ARM_NEON_FMA;
10618 GemmMicrokernelTester()
10619 .mr(6)
10620 .nr(8)
10621 .kr(1)
10622 .sr(1)
10623 .m(6)
10624 .n(8)
10625 .k(2)
10626 .cm_stride(11)
10627 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, xnn_init_f32_minmax_scalar_params);
10628 }
10629#endif // XNN_ARCH_ARM64
10630
10631
10632#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10633 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4) {
10634 TEST_REQUIRES_ARM_NEON;
10635 GemmMicrokernelTester()
10636 .mr(4)
10637 .nr(8)
10638 .kr(1)
10639 .sr(1)
10640 .m(4)
10641 .n(8)
10642 .k(4)
10643 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10644 }
10645
10646 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cn) {
10647 TEST_REQUIRES_ARM_NEON;
10648 GemmMicrokernelTester()
10649 .mr(4)
10650 .nr(8)
10651 .kr(1)
10652 .sr(1)
10653 .m(4)
10654 .n(8)
10655 .k(4)
10656 .cn_stride(11)
10657 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10658 }
10659
10660 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile) {
10661 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010662 for (uint32_t n = 1; n <= 8; n++) {
10663 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010664 GemmMicrokernelTester()
10665 .mr(4)
10666 .nr(8)
10667 .kr(1)
10668 .sr(1)
10669 .m(m)
10670 .n(n)
10671 .k(4)
10672 .iterations(1)
10673 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10674 }
10675 }
10676 }
10677
10678 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_m) {
10679 TEST_REQUIRES_ARM_NEON;
10680 for (uint32_t m = 1; m <= 4; m++) {
10681 GemmMicrokernelTester()
10682 .mr(4)
10683 .nr(8)
10684 .kr(1)
10685 .sr(1)
10686 .m(m)
10687 .n(8)
10688 .k(4)
10689 .iterations(1)
10690 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10691 }
10692 }
10693
10694 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_eq_4_subtile_n) {
10695 TEST_REQUIRES_ARM_NEON;
10696 for (uint32_t n = 1; n <= 8; n++) {
10697 GemmMicrokernelTester()
10698 .mr(4)
10699 .nr(8)
10700 .kr(1)
10701 .sr(1)
10702 .m(4)
10703 .n(n)
10704 .k(4)
10705 .iterations(1)
10706 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10707 }
10708 }
10709
10710 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4) {
10711 TEST_REQUIRES_ARM_NEON;
10712 for (size_t k = 1; k < 4; k++) {
10713 GemmMicrokernelTester()
10714 .mr(4)
10715 .nr(8)
10716 .kr(1)
10717 .sr(1)
10718 .m(4)
10719 .n(8)
10720 .k(k)
10721 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10722 }
10723 }
10724
10725 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_lt_4_subtile) {
10726 TEST_REQUIRES_ARM_NEON;
10727 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010728 for (uint32_t n = 1; n <= 8; n++) {
10729 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010730 GemmMicrokernelTester()
10731 .mr(4)
10732 .nr(8)
10733 .kr(1)
10734 .sr(1)
10735 .m(m)
10736 .n(n)
10737 .k(k)
10738 .iterations(1)
10739 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10740 }
10741 }
10742 }
10743 }
10744
10745 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4) {
10746 TEST_REQUIRES_ARM_NEON;
10747 for (size_t k = 5; k < 8; k++) {
10748 GemmMicrokernelTester()
10749 .mr(4)
10750 .nr(8)
10751 .kr(1)
10752 .sr(1)
10753 .m(4)
10754 .n(8)
10755 .k(k)
10756 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10757 }
10758 }
10759
10760 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_gt_4_subtile) {
10761 TEST_REQUIRES_ARM_NEON;
10762 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010763 for (uint32_t n = 1; n <= 8; n++) {
10764 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010765 GemmMicrokernelTester()
10766 .mr(4)
10767 .nr(8)
10768 .kr(1)
10769 .sr(1)
10770 .m(m)
10771 .n(n)
10772 .k(k)
10773 .iterations(1)
10774 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10775 }
10776 }
10777 }
10778 }
10779
10780 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4) {
10781 TEST_REQUIRES_ARM_NEON;
10782 for (size_t k = 8; k <= 40; k += 4) {
10783 GemmMicrokernelTester()
10784 .mr(4)
10785 .nr(8)
10786 .kr(1)
10787 .sr(1)
10788 .m(4)
10789 .n(8)
10790 .k(k)
10791 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10792 }
10793 }
10794
10795 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, k_div_4_subtile) {
10796 TEST_REQUIRES_ARM_NEON;
10797 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010798 for (uint32_t n = 1; n <= 8; n++) {
10799 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010800 GemmMicrokernelTester()
10801 .mr(4)
10802 .nr(8)
10803 .kr(1)
10804 .sr(1)
10805 .m(m)
10806 .n(n)
10807 .k(k)
10808 .iterations(1)
10809 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10810 }
10811 }
10812 }
10813 }
10814
10815 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8) {
10816 TEST_REQUIRES_ARM_NEON;
10817 for (uint32_t n = 9; n < 16; n++) {
10818 for (size_t k = 1; k <= 20; k += 5) {
10819 GemmMicrokernelTester()
10820 .mr(4)
10821 .nr(8)
10822 .kr(1)
10823 .sr(1)
10824 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010825 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010826 .k(k)
10827 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10828 }
10829 }
10830 }
10831
10832 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_strided_cn) {
10833 TEST_REQUIRES_ARM_NEON;
10834 for (uint32_t n = 9; n < 16; n++) {
10835 for (size_t k = 1; k <= 20; k += 5) {
10836 GemmMicrokernelTester()
10837 .mr(4)
10838 .nr(8)
10839 .kr(1)
10840 .sr(1)
10841 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010842 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010843 .k(k)
10844 .cn_stride(11)
10845 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10846 }
10847 }
10848 }
10849
10850 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_subtile) {
10851 TEST_REQUIRES_ARM_NEON;
10852 for (uint32_t n = 9; n < 16; n++) {
10853 for (size_t k = 1; k <= 20; k += 5) {
10854 for (uint32_t m = 1; m <= 4; m++) {
10855 GemmMicrokernelTester()
10856 .mr(4)
10857 .nr(8)
10858 .kr(1)
10859 .sr(1)
10860 .m(m)
10861 .n(n)
10862 .k(k)
10863 .iterations(1)
10864 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10865 }
10866 }
10867 }
10868 }
10869
10870 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8) {
10871 TEST_REQUIRES_ARM_NEON;
10872 for (uint32_t n = 16; n <= 24; n += 8) {
10873 for (size_t k = 1; k <= 20; k += 5) {
10874 GemmMicrokernelTester()
10875 .mr(4)
10876 .nr(8)
10877 .kr(1)
10878 .sr(1)
10879 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010880 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010881 .k(k)
10882 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10883 }
10884 }
10885 }
10886
10887 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_strided_cn) {
10888 TEST_REQUIRES_ARM_NEON;
10889 for (uint32_t n = 16; n <= 24; n += 8) {
10890 for (size_t k = 1; k <= 20; k += 5) {
10891 GemmMicrokernelTester()
10892 .mr(4)
10893 .nr(8)
10894 .kr(1)
10895 .sr(1)
10896 .m(4)
10897 .n(n)
10898 .k(k)
10899 .cn_stride(11)
10900 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10901 }
10902 }
10903 }
10904
10905 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_subtile) {
10906 TEST_REQUIRES_ARM_NEON;
10907 for (uint32_t n = 16; n <= 24; n += 8) {
10908 for (size_t k = 1; k <= 20; k += 5) {
10909 for (uint32_t m = 1; m <= 4; m++) {
10910 GemmMicrokernelTester()
10911 .mr(4)
10912 .nr(8)
10913 .kr(1)
10914 .sr(1)
10915 .m(m)
10916 .n(n)
10917 .k(k)
10918 .iterations(1)
10919 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10920 }
10921 }
10922 }
10923 }
10924
10925 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, small_kernel) {
10926 TEST_REQUIRES_ARM_NEON;
10927 for (size_t k = 1; k <= 20; k += 5) {
10928 GemmMicrokernelTester()
10929 .mr(4)
10930 .nr(8)
10931 .kr(1)
10932 .sr(1)
10933 .m(4)
10934 .n(8)
10935 .k(k)
10936 .ks(3)
10937 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10938 }
10939 }
10940
10941 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, small_kernel_subtile) {
10942 TEST_REQUIRES_ARM_NEON;
10943 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010944 for (uint32_t n = 1; n <= 8; n++) {
10945 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010946 GemmMicrokernelTester()
10947 .mr(4)
10948 .nr(8)
10949 .kr(1)
10950 .sr(1)
10951 .m(m)
10952 .n(n)
10953 .k(k)
10954 .ks(3)
10955 .iterations(1)
10956 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10957 }
10958 }
10959 }
10960 }
10961
10962 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_gt_8_small_kernel) {
10963 TEST_REQUIRES_ARM_NEON;
10964 for (uint32_t n = 9; n < 16; n++) {
10965 for (size_t k = 1; k <= 20; k += 5) {
10966 GemmMicrokernelTester()
10967 .mr(4)
10968 .nr(8)
10969 .kr(1)
10970 .sr(1)
10971 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010972 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010973 .k(k)
10974 .ks(3)
10975 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10976 }
10977 }
10978 }
10979
10980 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, n_div_8_small_kernel) {
10981 TEST_REQUIRES_ARM_NEON;
10982 for (uint32_t n = 16; n <= 24; n += 8) {
10983 for (size_t k = 1; k <= 20; k += 5) {
10984 GemmMicrokernelTester()
10985 .mr(4)
10986 .nr(8)
10987 .kr(1)
10988 .sr(1)
10989 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010990 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010991 .k(k)
10992 .ks(3)
10993 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
10994 }
10995 }
10996 }
10997
10998 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cm_subtile) {
10999 TEST_REQUIRES_ARM_NEON;
11000 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011001 for (uint32_t n = 1; n <= 8; n++) {
11002 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011003 GemmMicrokernelTester()
11004 .mr(4)
11005 .nr(8)
11006 .kr(1)
11007 .sr(1)
11008 .m(m)
11009 .n(n)
11010 .k(k)
11011 .cm_stride(11)
11012 .iterations(1)
11013 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11014 }
11015 }
11016 }
11017 }
11018
11019 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, a_offset) {
11020 TEST_REQUIRES_ARM_NEON;
11021 for (size_t k = 1; k <= 20; k += 5) {
11022 GemmMicrokernelTester()
11023 .mr(4)
11024 .nr(8)
11025 .kr(1)
11026 .sr(1)
11027 .m(4)
11028 .n(8)
11029 .k(k)
11030 .ks(3)
11031 .a_offset(83)
11032 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11033 }
11034 }
11035
11036 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, zero) {
11037 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011038 for (size_t k = 1; k <= 20; k += 5) {
11039 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011040 GemmMicrokernelTester()
11041 .mr(4)
11042 .nr(8)
11043 .kr(1)
11044 .sr(1)
11045 .m(4)
11046 .n(8)
11047 .k(k)
11048 .ks(3)
11049 .a_offset(83)
11050 .zero_index(mz)
11051 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11052 }
11053 }
11054 }
11055
11056 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, qmin) {
11057 TEST_REQUIRES_ARM_NEON;
11058 GemmMicrokernelTester()
11059 .mr(4)
11060 .nr(8)
11061 .kr(1)
11062 .sr(1)
11063 .m(4)
11064 .n(8)
11065 .k(4)
11066 .qmin(128)
11067 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11068 }
11069
11070 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, qmax) {
11071 TEST_REQUIRES_ARM_NEON;
11072 GemmMicrokernelTester()
11073 .mr(4)
11074 .nr(8)
11075 .kr(1)
11076 .sr(1)
11077 .m(4)
11078 .n(8)
11079 .k(4)
11080 .qmax(128)
11081 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11082 }
11083
11084 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD128, strided_cm) {
11085 TEST_REQUIRES_ARM_NEON;
11086 GemmMicrokernelTester()
11087 .mr(4)
11088 .nr(8)
11089 .kr(1)
11090 .sr(1)
11091 .m(4)
11092 .n(8)
11093 .k(4)
11094 .cm_stride(11)
11095 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, xnn_init_f32_minmax_scalar_params);
11096 }
11097#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11098
11099
11100#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11101 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2) {
11102 TEST_REQUIRES_ARM_NEON;
11103 GemmMicrokernelTester()
11104 .mr(4)
11105 .nr(8)
11106 .kr(1)
11107 .sr(1)
11108 .m(4)
11109 .n(8)
11110 .k(2)
11111 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11112 }
11113
11114 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, strided_cn) {
11115 TEST_REQUIRES_ARM_NEON;
11116 GemmMicrokernelTester()
11117 .mr(4)
11118 .nr(8)
11119 .kr(1)
11120 .sr(1)
11121 .m(4)
11122 .n(8)
11123 .k(2)
11124 .cn_stride(11)
11125 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11126 }
11127
11128 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile) {
11129 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011130 for (uint32_t n = 1; n <= 8; n++) {
11131 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011132 GemmMicrokernelTester()
11133 .mr(4)
11134 .nr(8)
11135 .kr(1)
11136 .sr(1)
11137 .m(m)
11138 .n(n)
11139 .k(2)
11140 .iterations(1)
11141 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11142 }
11143 }
11144 }
11145
11146 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_m) {
11147 TEST_REQUIRES_ARM_NEON;
11148 for (uint32_t m = 1; m <= 4; m++) {
11149 GemmMicrokernelTester()
11150 .mr(4)
11151 .nr(8)
11152 .kr(1)
11153 .sr(1)
11154 .m(m)
11155 .n(8)
11156 .k(2)
11157 .iterations(1)
11158 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11159 }
11160 }
11161
11162 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_eq_2_subtile_n) {
11163 TEST_REQUIRES_ARM_NEON;
11164 for (uint32_t n = 1; n <= 8; n++) {
11165 GemmMicrokernelTester()
11166 .mr(4)
11167 .nr(8)
11168 .kr(1)
11169 .sr(1)
11170 .m(4)
11171 .n(n)
11172 .k(2)
11173 .iterations(1)
11174 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11175 }
11176 }
11177
11178 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_lt_2) {
11179 TEST_REQUIRES_ARM_NEON;
11180 for (size_t k = 1; k < 2; k++) {
11181 GemmMicrokernelTester()
11182 .mr(4)
11183 .nr(8)
11184 .kr(1)
11185 .sr(1)
11186 .m(4)
11187 .n(8)
11188 .k(k)
11189 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11190 }
11191 }
11192
11193 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_lt_2_subtile) {
11194 TEST_REQUIRES_ARM_NEON;
11195 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011196 for (uint32_t n = 1; n <= 8; n++) {
11197 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011198 GemmMicrokernelTester()
11199 .mr(4)
11200 .nr(8)
11201 .kr(1)
11202 .sr(1)
11203 .m(m)
11204 .n(n)
11205 .k(k)
11206 .iterations(1)
11207 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11208 }
11209 }
11210 }
11211 }
11212
11213 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_gt_2) {
11214 TEST_REQUIRES_ARM_NEON;
11215 for (size_t k = 3; k < 4; k++) {
11216 GemmMicrokernelTester()
11217 .mr(4)
11218 .nr(8)
11219 .kr(1)
11220 .sr(1)
11221 .m(4)
11222 .n(8)
11223 .k(k)
11224 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11225 }
11226 }
11227
11228 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_gt_2_subtile) {
11229 TEST_REQUIRES_ARM_NEON;
11230 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011231 for (uint32_t n = 1; n <= 8; n++) {
11232 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011233 GemmMicrokernelTester()
11234 .mr(4)
11235 .nr(8)
11236 .kr(1)
11237 .sr(1)
11238 .m(m)
11239 .n(n)
11240 .k(k)
11241 .iterations(1)
11242 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11243 }
11244 }
11245 }
11246 }
11247
11248 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_div_2) {
11249 TEST_REQUIRES_ARM_NEON;
11250 for (size_t k = 4; k <= 20; k += 2) {
11251 GemmMicrokernelTester()
11252 .mr(4)
11253 .nr(8)
11254 .kr(1)
11255 .sr(1)
11256 .m(4)
11257 .n(8)
11258 .k(k)
11259 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11260 }
11261 }
11262
11263 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, k_div_2_subtile) {
11264 TEST_REQUIRES_ARM_NEON;
11265 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011266 for (uint32_t n = 1; n <= 8; n++) {
11267 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011268 GemmMicrokernelTester()
11269 .mr(4)
11270 .nr(8)
11271 .kr(1)
11272 .sr(1)
11273 .m(m)
11274 .n(n)
11275 .k(k)
11276 .iterations(1)
11277 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11278 }
11279 }
11280 }
11281 }
11282
11283 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8) {
11284 TEST_REQUIRES_ARM_NEON;
11285 for (uint32_t n = 9; n < 16; n++) {
11286 for (size_t k = 1; k <= 10; k += 3) {
11287 GemmMicrokernelTester()
11288 .mr(4)
11289 .nr(8)
11290 .kr(1)
11291 .sr(1)
11292 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011293 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011294 .k(k)
11295 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11296 }
11297 }
11298 }
11299
11300 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_strided_cn) {
11301 TEST_REQUIRES_ARM_NEON;
11302 for (uint32_t n = 9; n < 16; n++) {
11303 for (size_t k = 1; k <= 10; k += 3) {
11304 GemmMicrokernelTester()
11305 .mr(4)
11306 .nr(8)
11307 .kr(1)
11308 .sr(1)
11309 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011310 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011311 .k(k)
11312 .cn_stride(11)
11313 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11314 }
11315 }
11316 }
11317
11318 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_subtile) {
11319 TEST_REQUIRES_ARM_NEON;
11320 for (uint32_t n = 9; n < 16; n++) {
11321 for (size_t k = 1; k <= 10; k += 3) {
11322 for (uint32_t m = 1; m <= 4; m++) {
11323 GemmMicrokernelTester()
11324 .mr(4)
11325 .nr(8)
11326 .kr(1)
11327 .sr(1)
11328 .m(m)
11329 .n(n)
11330 .k(k)
11331 .iterations(1)
11332 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11333 }
11334 }
11335 }
11336 }
11337
11338 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8) {
11339 TEST_REQUIRES_ARM_NEON;
11340 for (uint32_t n = 16; n <= 24; n += 8) {
11341 for (size_t k = 1; k <= 10; k += 3) {
11342 GemmMicrokernelTester()
11343 .mr(4)
11344 .nr(8)
11345 .kr(1)
11346 .sr(1)
11347 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011348 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011349 .k(k)
11350 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11351 }
11352 }
11353 }
11354
11355 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8_strided_cn) {
11356 TEST_REQUIRES_ARM_NEON;
11357 for (uint32_t n = 16; n <= 24; n += 8) {
11358 for (size_t k = 1; k <= 10; k += 3) {
11359 GemmMicrokernelTester()
11360 .mr(4)
11361 .nr(8)
11362 .kr(1)
11363 .sr(1)
11364 .m(4)
11365 .n(n)
11366 .k(k)
11367 .cn_stride(11)
11368 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11369 }
11370 }
11371 }
11372
11373 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8_subtile) {
11374 TEST_REQUIRES_ARM_NEON;
11375 for (uint32_t n = 16; n <= 24; n += 8) {
11376 for (size_t k = 1; k <= 10; k += 3) {
11377 for (uint32_t m = 1; m <= 4; m++) {
11378 GemmMicrokernelTester()
11379 .mr(4)
11380 .nr(8)
11381 .kr(1)
11382 .sr(1)
11383 .m(m)
11384 .n(n)
11385 .k(k)
11386 .iterations(1)
11387 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11388 }
11389 }
11390 }
11391 }
11392
11393 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, small_kernel) {
11394 TEST_REQUIRES_ARM_NEON;
11395 for (size_t k = 1; k <= 10; k += 3) {
11396 GemmMicrokernelTester()
11397 .mr(4)
11398 .nr(8)
11399 .kr(1)
11400 .sr(1)
11401 .m(4)
11402 .n(8)
11403 .k(k)
11404 .ks(3)
11405 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11406 }
11407 }
11408
11409 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, small_kernel_subtile) {
11410 TEST_REQUIRES_ARM_NEON;
11411 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011412 for (uint32_t n = 1; n <= 8; n++) {
11413 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011414 GemmMicrokernelTester()
11415 .mr(4)
11416 .nr(8)
11417 .kr(1)
11418 .sr(1)
11419 .m(m)
11420 .n(n)
11421 .k(k)
11422 .ks(3)
11423 .iterations(1)
11424 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11425 }
11426 }
11427 }
11428 }
11429
11430 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_gt_8_small_kernel) {
11431 TEST_REQUIRES_ARM_NEON;
11432 for (uint32_t n = 9; n < 16; n++) {
11433 for (size_t k = 1; k <= 10; k += 3) {
11434 GemmMicrokernelTester()
11435 .mr(4)
11436 .nr(8)
11437 .kr(1)
11438 .sr(1)
11439 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011440 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011441 .k(k)
11442 .ks(3)
11443 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11444 }
11445 }
11446 }
11447
11448 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, n_div_8_small_kernel) {
11449 TEST_REQUIRES_ARM_NEON;
11450 for (uint32_t n = 16; n <= 24; n += 8) {
11451 for (size_t k = 1; k <= 10; k += 3) {
11452 GemmMicrokernelTester()
11453 .mr(4)
11454 .nr(8)
11455 .kr(1)
11456 .sr(1)
11457 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011458 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011459 .k(k)
11460 .ks(3)
11461 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11462 }
11463 }
11464 }
11465
11466 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, strided_cm_subtile) {
11467 TEST_REQUIRES_ARM_NEON;
11468 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011469 for (uint32_t n = 1; n <= 8; n++) {
11470 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011471 GemmMicrokernelTester()
11472 .mr(4)
11473 .nr(8)
11474 .kr(1)
11475 .sr(1)
11476 .m(m)
11477 .n(n)
11478 .k(k)
11479 .cm_stride(11)
11480 .iterations(1)
11481 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11482 }
11483 }
11484 }
11485 }
11486
11487 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, a_offset) {
11488 TEST_REQUIRES_ARM_NEON;
11489 for (size_t k = 1; k <= 10; k += 3) {
11490 GemmMicrokernelTester()
11491 .mr(4)
11492 .nr(8)
11493 .kr(1)
11494 .sr(1)
11495 .m(4)
11496 .n(8)
11497 .k(k)
11498 .ks(3)
11499 .a_offset(43)
11500 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11501 }
11502 }
11503
11504 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, zero) {
11505 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011506 for (size_t k = 1; k <= 10; k += 3) {
11507 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011508 GemmMicrokernelTester()
11509 .mr(4)
11510 .nr(8)
11511 .kr(1)
11512 .sr(1)
11513 .m(4)
11514 .n(8)
11515 .k(k)
11516 .ks(3)
11517 .a_offset(43)
11518 .zero_index(mz)
11519 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11520 }
11521 }
11522 }
11523
11524 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, qmin) {
11525 TEST_REQUIRES_ARM_NEON;
11526 GemmMicrokernelTester()
11527 .mr(4)
11528 .nr(8)
11529 .kr(1)
11530 .sr(1)
11531 .m(4)
11532 .n(8)
11533 .k(2)
11534 .qmin(128)
11535 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11536 }
11537
11538 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, qmax) {
11539 TEST_REQUIRES_ARM_NEON;
11540 GemmMicrokernelTester()
11541 .mr(4)
11542 .nr(8)
11543 .kr(1)
11544 .sr(1)
11545 .m(4)
11546 .n(8)
11547 .k(2)
11548 .qmax(128)
11549 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11550 }
11551
11552 TEST(F32_IGEMM_MINMAX_4X8__NEON_DUP_LD64, strided_cm) {
11553 TEST_REQUIRES_ARM_NEON;
11554 GemmMicrokernelTester()
11555 .mr(4)
11556 .nr(8)
11557 .kr(1)
11558 .sr(1)
11559 .m(4)
11560 .n(8)
11561 .k(2)
11562 .cm_stride(11)
11563 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, xnn_init_f32_minmax_scalar_params);
11564 }
11565#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11566
11567
11568#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11569 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4) {
11570 TEST_REQUIRES_ARM_NEON_FMA;
11571 GemmMicrokernelTester()
11572 .mr(4)
11573 .nr(8)
11574 .kr(1)
11575 .sr(1)
11576 .m(4)
11577 .n(8)
11578 .k(4)
11579 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11580 }
11581
11582 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cn) {
11583 TEST_REQUIRES_ARM_NEON_FMA;
11584 GemmMicrokernelTester()
11585 .mr(4)
11586 .nr(8)
11587 .kr(1)
11588 .sr(1)
11589 .m(4)
11590 .n(8)
11591 .k(4)
11592 .cn_stride(11)
11593 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11594 }
11595
11596 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
11597 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011598 for (uint32_t n = 1; n <= 8; n++) {
11599 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011600 GemmMicrokernelTester()
11601 .mr(4)
11602 .nr(8)
11603 .kr(1)
11604 .sr(1)
11605 .m(m)
11606 .n(n)
11607 .k(4)
11608 .iterations(1)
11609 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11610 }
11611 }
11612 }
11613
11614 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
11615 TEST_REQUIRES_ARM_NEON_FMA;
11616 for (uint32_t m = 1; m <= 4; m++) {
11617 GemmMicrokernelTester()
11618 .mr(4)
11619 .nr(8)
11620 .kr(1)
11621 .sr(1)
11622 .m(m)
11623 .n(8)
11624 .k(4)
11625 .iterations(1)
11626 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11627 }
11628 }
11629
11630 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
11631 TEST_REQUIRES_ARM_NEON_FMA;
11632 for (uint32_t n = 1; n <= 8; n++) {
11633 GemmMicrokernelTester()
11634 .mr(4)
11635 .nr(8)
11636 .kr(1)
11637 .sr(1)
11638 .m(4)
11639 .n(n)
11640 .k(4)
11641 .iterations(1)
11642 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11643 }
11644 }
11645
11646 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4) {
11647 TEST_REQUIRES_ARM_NEON_FMA;
11648 for (size_t k = 1; k < 4; k++) {
11649 GemmMicrokernelTester()
11650 .mr(4)
11651 .nr(8)
11652 .kr(1)
11653 .sr(1)
11654 .m(4)
11655 .n(8)
11656 .k(k)
11657 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11658 }
11659 }
11660
11661 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
11662 TEST_REQUIRES_ARM_NEON_FMA;
11663 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011664 for (uint32_t n = 1; n <= 8; n++) {
11665 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011666 GemmMicrokernelTester()
11667 .mr(4)
11668 .nr(8)
11669 .kr(1)
11670 .sr(1)
11671 .m(m)
11672 .n(n)
11673 .k(k)
11674 .iterations(1)
11675 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11676 }
11677 }
11678 }
11679 }
11680
11681 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4) {
11682 TEST_REQUIRES_ARM_NEON_FMA;
11683 for (size_t k = 5; k < 8; k++) {
11684 GemmMicrokernelTester()
11685 .mr(4)
11686 .nr(8)
11687 .kr(1)
11688 .sr(1)
11689 .m(4)
11690 .n(8)
11691 .k(k)
11692 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11693 }
11694 }
11695
11696 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
11697 TEST_REQUIRES_ARM_NEON_FMA;
11698 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011699 for (uint32_t n = 1; n <= 8; n++) {
11700 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011701 GemmMicrokernelTester()
11702 .mr(4)
11703 .nr(8)
11704 .kr(1)
11705 .sr(1)
11706 .m(m)
11707 .n(n)
11708 .k(k)
11709 .iterations(1)
11710 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11711 }
11712 }
11713 }
11714 }
11715
11716 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4) {
11717 TEST_REQUIRES_ARM_NEON_FMA;
11718 for (size_t k = 8; k <= 40; k += 4) {
11719 GemmMicrokernelTester()
11720 .mr(4)
11721 .nr(8)
11722 .kr(1)
11723 .sr(1)
11724 .m(4)
11725 .n(8)
11726 .k(k)
11727 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11728 }
11729 }
11730
11731 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
11732 TEST_REQUIRES_ARM_NEON_FMA;
11733 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011734 for (uint32_t n = 1; n <= 8; n++) {
11735 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011736 GemmMicrokernelTester()
11737 .mr(4)
11738 .nr(8)
11739 .kr(1)
11740 .sr(1)
11741 .m(m)
11742 .n(n)
11743 .k(k)
11744 .iterations(1)
11745 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11746 }
11747 }
11748 }
11749 }
11750
11751 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8) {
11752 TEST_REQUIRES_ARM_NEON_FMA;
11753 for (uint32_t n = 9; n < 16; n++) {
11754 for (size_t k = 1; k <= 20; k += 5) {
11755 GemmMicrokernelTester()
11756 .mr(4)
11757 .nr(8)
11758 .kr(1)
11759 .sr(1)
11760 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011761 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011762 .k(k)
11763 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11764 }
11765 }
11766 }
11767
11768 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
11769 TEST_REQUIRES_ARM_NEON_FMA;
11770 for (uint32_t n = 9; n < 16; n++) {
11771 for (size_t k = 1; k <= 20; k += 5) {
11772 GemmMicrokernelTester()
11773 .mr(4)
11774 .nr(8)
11775 .kr(1)
11776 .sr(1)
11777 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011778 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011779 .k(k)
11780 .cn_stride(11)
11781 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11782 }
11783 }
11784 }
11785
11786 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
11787 TEST_REQUIRES_ARM_NEON_FMA;
11788 for (uint32_t n = 9; n < 16; n++) {
11789 for (size_t k = 1; k <= 20; k += 5) {
11790 for (uint32_t m = 1; m <= 4; m++) {
11791 GemmMicrokernelTester()
11792 .mr(4)
11793 .nr(8)
11794 .kr(1)
11795 .sr(1)
11796 .m(m)
11797 .n(n)
11798 .k(k)
11799 .iterations(1)
11800 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11801 }
11802 }
11803 }
11804 }
11805
11806 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8) {
11807 TEST_REQUIRES_ARM_NEON_FMA;
11808 for (uint32_t n = 16; n <= 24; n += 8) {
11809 for (size_t k = 1; k <= 20; k += 5) {
11810 GemmMicrokernelTester()
11811 .mr(4)
11812 .nr(8)
11813 .kr(1)
11814 .sr(1)
11815 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011816 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011817 .k(k)
11818 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11819 }
11820 }
11821 }
11822
11823 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
11824 TEST_REQUIRES_ARM_NEON_FMA;
11825 for (uint32_t n = 16; n <= 24; n += 8) {
11826 for (size_t k = 1; k <= 20; k += 5) {
11827 GemmMicrokernelTester()
11828 .mr(4)
11829 .nr(8)
11830 .kr(1)
11831 .sr(1)
11832 .m(4)
11833 .n(n)
11834 .k(k)
11835 .cn_stride(11)
11836 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11837 }
11838 }
11839 }
11840
11841 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
11842 TEST_REQUIRES_ARM_NEON_FMA;
11843 for (uint32_t n = 16; n <= 24; n += 8) {
11844 for (size_t k = 1; k <= 20; k += 5) {
11845 for (uint32_t m = 1; m <= 4; m++) {
11846 GemmMicrokernelTester()
11847 .mr(4)
11848 .nr(8)
11849 .kr(1)
11850 .sr(1)
11851 .m(m)
11852 .n(n)
11853 .k(k)
11854 .iterations(1)
11855 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11856 }
11857 }
11858 }
11859 }
11860
11861 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, small_kernel) {
11862 TEST_REQUIRES_ARM_NEON_FMA;
11863 for (size_t k = 1; k <= 20; k += 5) {
11864 GemmMicrokernelTester()
11865 .mr(4)
11866 .nr(8)
11867 .kr(1)
11868 .sr(1)
11869 .m(4)
11870 .n(8)
11871 .k(k)
11872 .ks(3)
11873 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11874 }
11875 }
11876
11877 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, small_kernel_subtile) {
11878 TEST_REQUIRES_ARM_NEON_FMA;
11879 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011880 for (uint32_t n = 1; n <= 8; n++) {
11881 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011882 GemmMicrokernelTester()
11883 .mr(4)
11884 .nr(8)
11885 .kr(1)
11886 .sr(1)
11887 .m(m)
11888 .n(n)
11889 .k(k)
11890 .ks(3)
11891 .iterations(1)
11892 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11893 }
11894 }
11895 }
11896 }
11897
11898 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_gt_8_small_kernel) {
11899 TEST_REQUIRES_ARM_NEON_FMA;
11900 for (uint32_t n = 9; n < 16; n++) {
11901 for (size_t k = 1; k <= 20; k += 5) {
11902 GemmMicrokernelTester()
11903 .mr(4)
11904 .nr(8)
11905 .kr(1)
11906 .sr(1)
11907 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011908 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011909 .k(k)
11910 .ks(3)
11911 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11912 }
11913 }
11914 }
11915
11916 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, n_div_8_small_kernel) {
11917 TEST_REQUIRES_ARM_NEON_FMA;
11918 for (uint32_t n = 16; n <= 24; n += 8) {
11919 for (size_t k = 1; k <= 20; k += 5) {
11920 GemmMicrokernelTester()
11921 .mr(4)
11922 .nr(8)
11923 .kr(1)
11924 .sr(1)
11925 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011926 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011927 .k(k)
11928 .ks(3)
11929 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11930 }
11931 }
11932 }
11933
11934 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
11935 TEST_REQUIRES_ARM_NEON_FMA;
11936 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011937 for (uint32_t n = 1; n <= 8; n++) {
11938 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011939 GemmMicrokernelTester()
11940 .mr(4)
11941 .nr(8)
11942 .kr(1)
11943 .sr(1)
11944 .m(m)
11945 .n(n)
11946 .k(k)
11947 .cm_stride(11)
11948 .iterations(1)
11949 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11950 }
11951 }
11952 }
11953 }
11954
11955 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, a_offset) {
11956 TEST_REQUIRES_ARM_NEON_FMA;
11957 for (size_t k = 1; k <= 20; k += 5) {
11958 GemmMicrokernelTester()
11959 .mr(4)
11960 .nr(8)
11961 .kr(1)
11962 .sr(1)
11963 .m(4)
11964 .n(8)
11965 .k(k)
11966 .ks(3)
11967 .a_offset(83)
11968 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11969 }
11970 }
11971
11972 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, zero) {
11973 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011974 for (size_t k = 1; k <= 20; k += 5) {
11975 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011976 GemmMicrokernelTester()
11977 .mr(4)
11978 .nr(8)
11979 .kr(1)
11980 .sr(1)
11981 .m(4)
11982 .n(8)
11983 .k(k)
11984 .ks(3)
11985 .a_offset(83)
11986 .zero_index(mz)
11987 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
11988 }
11989 }
11990 }
11991
11992 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, qmin) {
11993 TEST_REQUIRES_ARM_NEON_FMA;
11994 GemmMicrokernelTester()
11995 .mr(4)
11996 .nr(8)
11997 .kr(1)
11998 .sr(1)
11999 .m(4)
12000 .n(8)
12001 .k(4)
12002 .qmin(128)
12003 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12004 }
12005
12006 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, qmax) {
12007 TEST_REQUIRES_ARM_NEON_FMA;
12008 GemmMicrokernelTester()
12009 .mr(4)
12010 .nr(8)
12011 .kr(1)
12012 .sr(1)
12013 .m(4)
12014 .n(8)
12015 .k(4)
12016 .qmax(128)
12017 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12018 }
12019
12020 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD128, strided_cm) {
12021 TEST_REQUIRES_ARM_NEON_FMA;
12022 GemmMicrokernelTester()
12023 .mr(4)
12024 .nr(8)
12025 .kr(1)
12026 .sr(1)
12027 .m(4)
12028 .n(8)
12029 .k(4)
12030 .cm_stride(11)
12031 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12032 }
12033#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12034
12035
12036#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12037 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2) {
12038 TEST_REQUIRES_ARM_NEON_FMA;
12039 GemmMicrokernelTester()
12040 .mr(4)
12041 .nr(8)
12042 .kr(1)
12043 .sr(1)
12044 .m(4)
12045 .n(8)
12046 .k(2)
12047 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12048 }
12049
12050 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cn) {
12051 TEST_REQUIRES_ARM_NEON_FMA;
12052 GemmMicrokernelTester()
12053 .mr(4)
12054 .nr(8)
12055 .kr(1)
12056 .sr(1)
12057 .m(4)
12058 .n(8)
12059 .k(2)
12060 .cn_stride(11)
12061 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12062 }
12063
12064 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile) {
12065 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012066 for (uint32_t n = 1; n <= 8; n++) {
12067 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012068 GemmMicrokernelTester()
12069 .mr(4)
12070 .nr(8)
12071 .kr(1)
12072 .sr(1)
12073 .m(m)
12074 .n(n)
12075 .k(2)
12076 .iterations(1)
12077 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12078 }
12079 }
12080 }
12081
12082 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_m) {
12083 TEST_REQUIRES_ARM_NEON_FMA;
12084 for (uint32_t m = 1; m <= 4; m++) {
12085 GemmMicrokernelTester()
12086 .mr(4)
12087 .nr(8)
12088 .kr(1)
12089 .sr(1)
12090 .m(m)
12091 .n(8)
12092 .k(2)
12093 .iterations(1)
12094 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12095 }
12096 }
12097
12098 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_eq_2_subtile_n) {
12099 TEST_REQUIRES_ARM_NEON_FMA;
12100 for (uint32_t n = 1; n <= 8; n++) {
12101 GemmMicrokernelTester()
12102 .mr(4)
12103 .nr(8)
12104 .kr(1)
12105 .sr(1)
12106 .m(4)
12107 .n(n)
12108 .k(2)
12109 .iterations(1)
12110 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12111 }
12112 }
12113
12114 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2) {
12115 TEST_REQUIRES_ARM_NEON_FMA;
12116 for (size_t k = 1; k < 2; k++) {
12117 GemmMicrokernelTester()
12118 .mr(4)
12119 .nr(8)
12120 .kr(1)
12121 .sr(1)
12122 .m(4)
12123 .n(8)
12124 .k(k)
12125 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12126 }
12127 }
12128
12129 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_lt_2_subtile) {
12130 TEST_REQUIRES_ARM_NEON_FMA;
12131 for (size_t k = 1; k < 2; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012132 for (uint32_t n = 1; n <= 8; n++) {
12133 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012134 GemmMicrokernelTester()
12135 .mr(4)
12136 .nr(8)
12137 .kr(1)
12138 .sr(1)
12139 .m(m)
12140 .n(n)
12141 .k(k)
12142 .iterations(1)
12143 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12144 }
12145 }
12146 }
12147 }
12148
12149 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2) {
12150 TEST_REQUIRES_ARM_NEON_FMA;
12151 for (size_t k = 3; k < 4; k++) {
12152 GemmMicrokernelTester()
12153 .mr(4)
12154 .nr(8)
12155 .kr(1)
12156 .sr(1)
12157 .m(4)
12158 .n(8)
12159 .k(k)
12160 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12161 }
12162 }
12163
12164 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_gt_2_subtile) {
12165 TEST_REQUIRES_ARM_NEON_FMA;
12166 for (size_t k = 3; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012167 for (uint32_t n = 1; n <= 8; n++) {
12168 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012169 GemmMicrokernelTester()
12170 .mr(4)
12171 .nr(8)
12172 .kr(1)
12173 .sr(1)
12174 .m(m)
12175 .n(n)
12176 .k(k)
12177 .iterations(1)
12178 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12179 }
12180 }
12181 }
12182 }
12183
12184 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2) {
12185 TEST_REQUIRES_ARM_NEON_FMA;
12186 for (size_t k = 4; k <= 20; k += 2) {
12187 GemmMicrokernelTester()
12188 .mr(4)
12189 .nr(8)
12190 .kr(1)
12191 .sr(1)
12192 .m(4)
12193 .n(8)
12194 .k(k)
12195 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12196 }
12197 }
12198
12199 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, k_div_2_subtile) {
12200 TEST_REQUIRES_ARM_NEON_FMA;
12201 for (size_t k = 4; k <= 20; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012202 for (uint32_t n = 1; n <= 8; n++) {
12203 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012204 GemmMicrokernelTester()
12205 .mr(4)
12206 .nr(8)
12207 .kr(1)
12208 .sr(1)
12209 .m(m)
12210 .n(n)
12211 .k(k)
12212 .iterations(1)
12213 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12214 }
12215 }
12216 }
12217 }
12218
12219 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8) {
12220 TEST_REQUIRES_ARM_NEON_FMA;
12221 for (uint32_t n = 9; n < 16; n++) {
12222 for (size_t k = 1; k <= 10; k += 3) {
12223 GemmMicrokernelTester()
12224 .mr(4)
12225 .nr(8)
12226 .kr(1)
12227 .sr(1)
12228 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012229 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012230 .k(k)
12231 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12232 }
12233 }
12234 }
12235
12236 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_strided_cn) {
12237 TEST_REQUIRES_ARM_NEON_FMA;
12238 for (uint32_t n = 9; n < 16; n++) {
12239 for (size_t k = 1; k <= 10; k += 3) {
12240 GemmMicrokernelTester()
12241 .mr(4)
12242 .nr(8)
12243 .kr(1)
12244 .sr(1)
12245 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012246 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012247 .k(k)
12248 .cn_stride(11)
12249 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12250 }
12251 }
12252 }
12253
12254 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_subtile) {
12255 TEST_REQUIRES_ARM_NEON_FMA;
12256 for (uint32_t n = 9; n < 16; n++) {
12257 for (size_t k = 1; k <= 10; k += 3) {
12258 for (uint32_t m = 1; m <= 4; m++) {
12259 GemmMicrokernelTester()
12260 .mr(4)
12261 .nr(8)
12262 .kr(1)
12263 .sr(1)
12264 .m(m)
12265 .n(n)
12266 .k(k)
12267 .iterations(1)
12268 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12269 }
12270 }
12271 }
12272 }
12273
12274 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8) {
12275 TEST_REQUIRES_ARM_NEON_FMA;
12276 for (uint32_t n = 16; n <= 24; n += 8) {
12277 for (size_t k = 1; k <= 10; k += 3) {
12278 GemmMicrokernelTester()
12279 .mr(4)
12280 .nr(8)
12281 .kr(1)
12282 .sr(1)
12283 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012284 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012285 .k(k)
12286 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12287 }
12288 }
12289 }
12290
12291 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_strided_cn) {
12292 TEST_REQUIRES_ARM_NEON_FMA;
12293 for (uint32_t n = 16; n <= 24; n += 8) {
12294 for (size_t k = 1; k <= 10; k += 3) {
12295 GemmMicrokernelTester()
12296 .mr(4)
12297 .nr(8)
12298 .kr(1)
12299 .sr(1)
12300 .m(4)
12301 .n(n)
12302 .k(k)
12303 .cn_stride(11)
12304 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12305 }
12306 }
12307 }
12308
12309 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_subtile) {
12310 TEST_REQUIRES_ARM_NEON_FMA;
12311 for (uint32_t n = 16; n <= 24; n += 8) {
12312 for (size_t k = 1; k <= 10; k += 3) {
12313 for (uint32_t m = 1; m <= 4; m++) {
12314 GemmMicrokernelTester()
12315 .mr(4)
12316 .nr(8)
12317 .kr(1)
12318 .sr(1)
12319 .m(m)
12320 .n(n)
12321 .k(k)
12322 .iterations(1)
12323 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12324 }
12325 }
12326 }
12327 }
12328
12329 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, small_kernel) {
12330 TEST_REQUIRES_ARM_NEON_FMA;
12331 for (size_t k = 1; k <= 10; k += 3) {
12332 GemmMicrokernelTester()
12333 .mr(4)
12334 .nr(8)
12335 .kr(1)
12336 .sr(1)
12337 .m(4)
12338 .n(8)
12339 .k(k)
12340 .ks(3)
12341 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12342 }
12343 }
12344
12345 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, small_kernel_subtile) {
12346 TEST_REQUIRES_ARM_NEON_FMA;
12347 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012348 for (uint32_t n = 1; n <= 8; n++) {
12349 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012350 GemmMicrokernelTester()
12351 .mr(4)
12352 .nr(8)
12353 .kr(1)
12354 .sr(1)
12355 .m(m)
12356 .n(n)
12357 .k(k)
12358 .ks(3)
12359 .iterations(1)
12360 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12361 }
12362 }
12363 }
12364 }
12365
12366 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_gt_8_small_kernel) {
12367 TEST_REQUIRES_ARM_NEON_FMA;
12368 for (uint32_t n = 9; n < 16; n++) {
12369 for (size_t k = 1; k <= 10; k += 3) {
12370 GemmMicrokernelTester()
12371 .mr(4)
12372 .nr(8)
12373 .kr(1)
12374 .sr(1)
12375 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012376 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012377 .k(k)
12378 .ks(3)
12379 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12380 }
12381 }
12382 }
12383
12384 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, n_div_8_small_kernel) {
12385 TEST_REQUIRES_ARM_NEON_FMA;
12386 for (uint32_t n = 16; n <= 24; n += 8) {
12387 for (size_t k = 1; k <= 10; k += 3) {
12388 GemmMicrokernelTester()
12389 .mr(4)
12390 .nr(8)
12391 .kr(1)
12392 .sr(1)
12393 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012394 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012395 .k(k)
12396 .ks(3)
12397 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12398 }
12399 }
12400 }
12401
12402 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm_subtile) {
12403 TEST_REQUIRES_ARM_NEON_FMA;
12404 for (size_t k = 1; k <= 10; k += 3) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012405 for (uint32_t n = 1; n <= 8; n++) {
12406 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012407 GemmMicrokernelTester()
12408 .mr(4)
12409 .nr(8)
12410 .kr(1)
12411 .sr(1)
12412 .m(m)
12413 .n(n)
12414 .k(k)
12415 .cm_stride(11)
12416 .iterations(1)
12417 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12418 }
12419 }
12420 }
12421 }
12422
12423 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, a_offset) {
12424 TEST_REQUIRES_ARM_NEON_FMA;
12425 for (size_t k = 1; k <= 10; k += 3) {
12426 GemmMicrokernelTester()
12427 .mr(4)
12428 .nr(8)
12429 .kr(1)
12430 .sr(1)
12431 .m(4)
12432 .n(8)
12433 .k(k)
12434 .ks(3)
12435 .a_offset(43)
12436 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12437 }
12438 }
12439
12440 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, zero) {
12441 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012442 for (size_t k = 1; k <= 10; k += 3) {
12443 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012444 GemmMicrokernelTester()
12445 .mr(4)
12446 .nr(8)
12447 .kr(1)
12448 .sr(1)
12449 .m(4)
12450 .n(8)
12451 .k(k)
12452 .ks(3)
12453 .a_offset(43)
12454 .zero_index(mz)
12455 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12456 }
12457 }
12458 }
12459
12460 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, qmin) {
12461 TEST_REQUIRES_ARM_NEON_FMA;
12462 GemmMicrokernelTester()
12463 .mr(4)
12464 .nr(8)
12465 .kr(1)
12466 .sr(1)
12467 .m(4)
12468 .n(8)
12469 .k(2)
12470 .qmin(128)
12471 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12472 }
12473
12474 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, qmax) {
12475 TEST_REQUIRES_ARM_NEON_FMA;
12476 GemmMicrokernelTester()
12477 .mr(4)
12478 .nr(8)
12479 .kr(1)
12480 .sr(1)
12481 .m(4)
12482 .n(8)
12483 .k(2)
12484 .qmax(128)
12485 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12486 }
12487
12488 TEST(F32_IGEMM_MINMAX_4X8__NEONFMA_DUP_LD64, strided_cm) {
12489 TEST_REQUIRES_ARM_NEON_FMA;
12490 GemmMicrokernelTester()
12491 .mr(4)
12492 .nr(8)
12493 .kr(1)
12494 .sr(1)
12495 .m(4)
12496 .n(8)
12497 .k(2)
12498 .cm_stride(11)
12499 .Test(xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, xnn_init_f32_minmax_scalar_params);
12500 }
12501#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12502
12503
12504#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12505 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4) {
12506 TEST_REQUIRES_ARM_NEON_FMA;
12507 GemmMicrokernelTester()
12508 .mr(6)
12509 .nr(8)
12510 .kr(1)
12511 .sr(1)
12512 .m(6)
12513 .n(8)
12514 .k(4)
12515 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12516 }
12517
12518 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cn) {
12519 TEST_REQUIRES_ARM_NEON_FMA;
12520 GemmMicrokernelTester()
12521 .mr(6)
12522 .nr(8)
12523 .kr(1)
12524 .sr(1)
12525 .m(6)
12526 .n(8)
12527 .k(4)
12528 .cn_stride(11)
12529 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12530 }
12531
12532 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile) {
12533 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012534 for (uint32_t n = 1; n <= 8; n++) {
12535 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012536 GemmMicrokernelTester()
12537 .mr(6)
12538 .nr(8)
12539 .kr(1)
12540 .sr(1)
12541 .m(m)
12542 .n(n)
12543 .k(4)
12544 .iterations(1)
12545 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12546 }
12547 }
12548 }
12549
12550 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_m) {
12551 TEST_REQUIRES_ARM_NEON_FMA;
12552 for (uint32_t m = 1; m <= 6; m++) {
12553 GemmMicrokernelTester()
12554 .mr(6)
12555 .nr(8)
12556 .kr(1)
12557 .sr(1)
12558 .m(m)
12559 .n(8)
12560 .k(4)
12561 .iterations(1)
12562 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12563 }
12564 }
12565
12566 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_eq_4_subtile_n) {
12567 TEST_REQUIRES_ARM_NEON_FMA;
12568 for (uint32_t n = 1; n <= 8; n++) {
12569 GemmMicrokernelTester()
12570 .mr(6)
12571 .nr(8)
12572 .kr(1)
12573 .sr(1)
12574 .m(6)
12575 .n(n)
12576 .k(4)
12577 .iterations(1)
12578 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12579 }
12580 }
12581
12582 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4) {
12583 TEST_REQUIRES_ARM_NEON_FMA;
12584 for (size_t k = 1; k < 4; k++) {
12585 GemmMicrokernelTester()
12586 .mr(6)
12587 .nr(8)
12588 .kr(1)
12589 .sr(1)
12590 .m(6)
12591 .n(8)
12592 .k(k)
12593 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12594 }
12595 }
12596
12597 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_lt_4_subtile) {
12598 TEST_REQUIRES_ARM_NEON_FMA;
12599 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012600 for (uint32_t n = 1; n <= 8; n++) {
12601 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012602 GemmMicrokernelTester()
12603 .mr(6)
12604 .nr(8)
12605 .kr(1)
12606 .sr(1)
12607 .m(m)
12608 .n(n)
12609 .k(k)
12610 .iterations(1)
12611 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12612 }
12613 }
12614 }
12615 }
12616
12617 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4) {
12618 TEST_REQUIRES_ARM_NEON_FMA;
12619 for (size_t k = 5; k < 8; k++) {
12620 GemmMicrokernelTester()
12621 .mr(6)
12622 .nr(8)
12623 .kr(1)
12624 .sr(1)
12625 .m(6)
12626 .n(8)
12627 .k(k)
12628 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12629 }
12630 }
12631
12632 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_gt_4_subtile) {
12633 TEST_REQUIRES_ARM_NEON_FMA;
12634 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012635 for (uint32_t n = 1; n <= 8; n++) {
12636 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012637 GemmMicrokernelTester()
12638 .mr(6)
12639 .nr(8)
12640 .kr(1)
12641 .sr(1)
12642 .m(m)
12643 .n(n)
12644 .k(k)
12645 .iterations(1)
12646 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12647 }
12648 }
12649 }
12650 }
12651
12652 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4) {
12653 TEST_REQUIRES_ARM_NEON_FMA;
12654 for (size_t k = 8; k <= 40; k += 4) {
12655 GemmMicrokernelTester()
12656 .mr(6)
12657 .nr(8)
12658 .kr(1)
12659 .sr(1)
12660 .m(6)
12661 .n(8)
12662 .k(k)
12663 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12664 }
12665 }
12666
12667 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, k_div_4_subtile) {
12668 TEST_REQUIRES_ARM_NEON_FMA;
12669 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012670 for (uint32_t n = 1; n <= 8; n++) {
12671 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012672 GemmMicrokernelTester()
12673 .mr(6)
12674 .nr(8)
12675 .kr(1)
12676 .sr(1)
12677 .m(m)
12678 .n(n)
12679 .k(k)
12680 .iterations(1)
12681 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12682 }
12683 }
12684 }
12685 }
12686
12687 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8) {
12688 TEST_REQUIRES_ARM_NEON_FMA;
12689 for (uint32_t n = 9; n < 16; n++) {
12690 for (size_t k = 1; k <= 20; k += 5) {
12691 GemmMicrokernelTester()
12692 .mr(6)
12693 .nr(8)
12694 .kr(1)
12695 .sr(1)
12696 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012697 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012698 .k(k)
12699 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12700 }
12701 }
12702 }
12703
12704 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_strided_cn) {
12705 TEST_REQUIRES_ARM_NEON_FMA;
12706 for (uint32_t n = 9; n < 16; n++) {
12707 for (size_t k = 1; k <= 20; k += 5) {
12708 GemmMicrokernelTester()
12709 .mr(6)
12710 .nr(8)
12711 .kr(1)
12712 .sr(1)
12713 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012714 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012715 .k(k)
12716 .cn_stride(11)
12717 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12718 }
12719 }
12720 }
12721
12722 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_subtile) {
12723 TEST_REQUIRES_ARM_NEON_FMA;
12724 for (uint32_t n = 9; n < 16; n++) {
12725 for (size_t k = 1; k <= 20; k += 5) {
12726 for (uint32_t m = 1; m <= 6; m++) {
12727 GemmMicrokernelTester()
12728 .mr(6)
12729 .nr(8)
12730 .kr(1)
12731 .sr(1)
12732 .m(m)
12733 .n(n)
12734 .k(k)
12735 .iterations(1)
12736 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12737 }
12738 }
12739 }
12740 }
12741
12742 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8) {
12743 TEST_REQUIRES_ARM_NEON_FMA;
12744 for (uint32_t n = 16; n <= 24; n += 8) {
12745 for (size_t k = 1; k <= 20; k += 5) {
12746 GemmMicrokernelTester()
12747 .mr(6)
12748 .nr(8)
12749 .kr(1)
12750 .sr(1)
12751 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012752 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012753 .k(k)
12754 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12755 }
12756 }
12757 }
12758
12759 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_strided_cn) {
12760 TEST_REQUIRES_ARM_NEON_FMA;
12761 for (uint32_t n = 16; n <= 24; n += 8) {
12762 for (size_t k = 1; k <= 20; k += 5) {
12763 GemmMicrokernelTester()
12764 .mr(6)
12765 .nr(8)
12766 .kr(1)
12767 .sr(1)
12768 .m(6)
12769 .n(n)
12770 .k(k)
12771 .cn_stride(11)
12772 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12773 }
12774 }
12775 }
12776
12777 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_subtile) {
12778 TEST_REQUIRES_ARM_NEON_FMA;
12779 for (uint32_t n = 16; n <= 24; n += 8) {
12780 for (size_t k = 1; k <= 20; k += 5) {
12781 for (uint32_t m = 1; m <= 6; m++) {
12782 GemmMicrokernelTester()
12783 .mr(6)
12784 .nr(8)
12785 .kr(1)
12786 .sr(1)
12787 .m(m)
12788 .n(n)
12789 .k(k)
12790 .iterations(1)
12791 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12792 }
12793 }
12794 }
12795 }
12796
12797 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, small_kernel) {
12798 TEST_REQUIRES_ARM_NEON_FMA;
12799 for (size_t k = 1; k <= 20; k += 5) {
12800 GemmMicrokernelTester()
12801 .mr(6)
12802 .nr(8)
12803 .kr(1)
12804 .sr(1)
12805 .m(6)
12806 .n(8)
12807 .k(k)
12808 .ks(3)
12809 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12810 }
12811 }
12812
12813 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, small_kernel_subtile) {
12814 TEST_REQUIRES_ARM_NEON_FMA;
12815 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012816 for (uint32_t n = 1; n <= 8; n++) {
12817 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012818 GemmMicrokernelTester()
12819 .mr(6)
12820 .nr(8)
12821 .kr(1)
12822 .sr(1)
12823 .m(m)
12824 .n(n)
12825 .k(k)
12826 .ks(3)
12827 .iterations(1)
12828 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12829 }
12830 }
12831 }
12832 }
12833
12834 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_gt_8_small_kernel) {
12835 TEST_REQUIRES_ARM_NEON_FMA;
12836 for (uint32_t n = 9; n < 16; n++) {
12837 for (size_t k = 1; k <= 20; k += 5) {
12838 GemmMicrokernelTester()
12839 .mr(6)
12840 .nr(8)
12841 .kr(1)
12842 .sr(1)
12843 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012844 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012845 .k(k)
12846 .ks(3)
12847 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12848 }
12849 }
12850 }
12851
12852 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, n_div_8_small_kernel) {
12853 TEST_REQUIRES_ARM_NEON_FMA;
12854 for (uint32_t n = 16; n <= 24; n += 8) {
12855 for (size_t k = 1; k <= 20; k += 5) {
12856 GemmMicrokernelTester()
12857 .mr(6)
12858 .nr(8)
12859 .kr(1)
12860 .sr(1)
12861 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012862 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012863 .k(k)
12864 .ks(3)
12865 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12866 }
12867 }
12868 }
12869
12870 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm_subtile) {
12871 TEST_REQUIRES_ARM_NEON_FMA;
12872 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012873 for (uint32_t n = 1; n <= 8; n++) {
12874 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012875 GemmMicrokernelTester()
12876 .mr(6)
12877 .nr(8)
12878 .kr(1)
12879 .sr(1)
12880 .m(m)
12881 .n(n)
12882 .k(k)
12883 .cm_stride(11)
12884 .iterations(1)
12885 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12886 }
12887 }
12888 }
12889 }
12890
12891 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, a_offset) {
12892 TEST_REQUIRES_ARM_NEON_FMA;
12893 for (size_t k = 1; k <= 20; k += 5) {
12894 GemmMicrokernelTester()
12895 .mr(6)
12896 .nr(8)
12897 .kr(1)
12898 .sr(1)
12899 .m(6)
12900 .n(8)
12901 .k(k)
12902 .ks(3)
12903 .a_offset(127)
12904 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12905 }
12906 }
12907
12908 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, zero) {
12909 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012910 for (size_t k = 1; k <= 20; k += 5) {
12911 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012912 GemmMicrokernelTester()
12913 .mr(6)
12914 .nr(8)
12915 .kr(1)
12916 .sr(1)
12917 .m(6)
12918 .n(8)
12919 .k(k)
12920 .ks(3)
12921 .a_offset(127)
12922 .zero_index(mz)
12923 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12924 }
12925 }
12926 }
12927
12928 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, qmin) {
12929 TEST_REQUIRES_ARM_NEON_FMA;
12930 GemmMicrokernelTester()
12931 .mr(6)
12932 .nr(8)
12933 .kr(1)
12934 .sr(1)
12935 .m(6)
12936 .n(8)
12937 .k(4)
12938 .qmin(128)
12939 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12940 }
12941
12942 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, qmax) {
12943 TEST_REQUIRES_ARM_NEON_FMA;
12944 GemmMicrokernelTester()
12945 .mr(6)
12946 .nr(8)
12947 .kr(1)
12948 .sr(1)
12949 .m(6)
12950 .n(8)
12951 .k(4)
12952 .qmax(128)
12953 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12954 }
12955
12956 TEST(F32_IGEMM_MINMAX_6X8__NEONFMA_DUP_LD128, strided_cm) {
12957 TEST_REQUIRES_ARM_NEON_FMA;
12958 GemmMicrokernelTester()
12959 .mr(6)
12960 .nr(8)
12961 .kr(1)
12962 .sr(1)
12963 .m(6)
12964 .n(8)
12965 .k(4)
12966 .cm_stride(11)
12967 .Test(xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, xnn_init_f32_minmax_scalar_params);
12968 }
12969#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12970
12971
12972#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12973 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4) {
12974 TEST_REQUIRES_ARM_NEON;
12975 GemmMicrokernelTester()
12976 .mr(1)
12977 .nr(8)
12978 .kr(1)
12979 .sr(4)
12980 .m(1)
12981 .n(8)
12982 .k(4)
12983 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
12984 }
12985
12986 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, strided_cn) {
12987 TEST_REQUIRES_ARM_NEON;
12988 GemmMicrokernelTester()
12989 .mr(1)
12990 .nr(8)
12991 .kr(1)
12992 .sr(4)
12993 .m(1)
12994 .n(8)
12995 .k(4)
12996 .cn_stride(11)
12997 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
12998 }
12999
13000 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile) {
13001 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013002 for (uint32_t n = 1; n <= 8; n++) {
13003 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013004 GemmMicrokernelTester()
13005 .mr(1)
13006 .nr(8)
13007 .kr(1)
13008 .sr(4)
13009 .m(m)
13010 .n(n)
13011 .k(4)
13012 .iterations(1)
13013 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13014 }
13015 }
13016 }
13017
13018 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile_m) {
13019 TEST_REQUIRES_ARM_NEON;
13020 for (uint32_t m = 1; m <= 1; m++) {
13021 GemmMicrokernelTester()
13022 .mr(1)
13023 .nr(8)
13024 .kr(1)
13025 .sr(4)
13026 .m(m)
13027 .n(8)
13028 .k(4)
13029 .iterations(1)
13030 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13031 }
13032 }
13033
13034 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_eq_4_subtile_n) {
13035 TEST_REQUIRES_ARM_NEON;
13036 for (uint32_t n = 1; n <= 8; n++) {
13037 GemmMicrokernelTester()
13038 .mr(1)
13039 .nr(8)
13040 .kr(1)
13041 .sr(4)
13042 .m(1)
13043 .n(n)
13044 .k(4)
13045 .iterations(1)
13046 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13047 }
13048 }
13049
13050 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_lt_4) {
13051 TEST_REQUIRES_ARM_NEON;
13052 for (size_t k = 1; k < 4; k++) {
13053 GemmMicrokernelTester()
13054 .mr(1)
13055 .nr(8)
13056 .kr(1)
13057 .sr(4)
13058 .m(1)
13059 .n(8)
13060 .k(k)
13061 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13062 }
13063 }
13064
13065 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_lt_4_subtile) {
13066 TEST_REQUIRES_ARM_NEON;
13067 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013068 for (uint32_t n = 1; n <= 8; n++) {
13069 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013070 GemmMicrokernelTester()
13071 .mr(1)
13072 .nr(8)
13073 .kr(1)
13074 .sr(4)
13075 .m(m)
13076 .n(n)
13077 .k(k)
13078 .iterations(1)
13079 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13080 }
13081 }
13082 }
13083 }
13084
13085 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_gt_4) {
13086 TEST_REQUIRES_ARM_NEON;
13087 for (size_t k = 5; k < 8; k++) {
13088 GemmMicrokernelTester()
13089 .mr(1)
13090 .nr(8)
13091 .kr(1)
13092 .sr(4)
13093 .m(1)
13094 .n(8)
13095 .k(k)
13096 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13097 }
13098 }
13099
13100 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_gt_4_subtile) {
13101 TEST_REQUIRES_ARM_NEON;
13102 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013103 for (uint32_t n = 1; n <= 8; n++) {
13104 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013105 GemmMicrokernelTester()
13106 .mr(1)
13107 .nr(8)
13108 .kr(1)
13109 .sr(4)
13110 .m(m)
13111 .n(n)
13112 .k(k)
13113 .iterations(1)
13114 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13115 }
13116 }
13117 }
13118 }
13119
13120 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_div_4) {
13121 TEST_REQUIRES_ARM_NEON;
13122 for (size_t k = 8; k <= 40; k += 4) {
13123 GemmMicrokernelTester()
13124 .mr(1)
13125 .nr(8)
13126 .kr(1)
13127 .sr(4)
13128 .m(1)
13129 .n(8)
13130 .k(k)
13131 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13132 }
13133 }
13134
13135 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, k_div_4_subtile) {
13136 TEST_REQUIRES_ARM_NEON;
13137 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013138 for (uint32_t n = 1; n <= 8; n++) {
13139 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013140 GemmMicrokernelTester()
13141 .mr(1)
13142 .nr(8)
13143 .kr(1)
13144 .sr(4)
13145 .m(m)
13146 .n(n)
13147 .k(k)
13148 .iterations(1)
13149 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13150 }
13151 }
13152 }
13153 }
13154
13155 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8) {
13156 TEST_REQUIRES_ARM_NEON;
13157 for (uint32_t n = 9; n < 16; n++) {
13158 for (size_t k = 1; k <= 20; k += 5) {
13159 GemmMicrokernelTester()
13160 .mr(1)
13161 .nr(8)
13162 .kr(1)
13163 .sr(4)
13164 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013165 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013166 .k(k)
13167 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13168 }
13169 }
13170 }
13171
13172 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8_strided_cn) {
13173 TEST_REQUIRES_ARM_NEON;
13174 for (uint32_t n = 9; n < 16; n++) {
13175 for (size_t k = 1; k <= 20; k += 5) {
13176 GemmMicrokernelTester()
13177 .mr(1)
13178 .nr(8)
13179 .kr(1)
13180 .sr(4)
13181 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013182 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013183 .k(k)
13184 .cn_stride(11)
13185 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13186 }
13187 }
13188 }
13189
13190 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8_subtile) {
13191 TEST_REQUIRES_ARM_NEON;
13192 for (uint32_t n = 9; n < 16; n++) {
13193 for (size_t k = 1; k <= 20; k += 5) {
13194 for (uint32_t m = 1; m <= 1; m++) {
13195 GemmMicrokernelTester()
13196 .mr(1)
13197 .nr(8)
13198 .kr(1)
13199 .sr(4)
13200 .m(m)
13201 .n(n)
13202 .k(k)
13203 .iterations(1)
13204 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13205 }
13206 }
13207 }
13208 }
13209
13210 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8) {
13211 TEST_REQUIRES_ARM_NEON;
13212 for (uint32_t n = 16; n <= 24; n += 8) {
13213 for (size_t k = 1; k <= 20; k += 5) {
13214 GemmMicrokernelTester()
13215 .mr(1)
13216 .nr(8)
13217 .kr(1)
13218 .sr(4)
13219 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013220 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013221 .k(k)
13222 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13223 }
13224 }
13225 }
13226
13227 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8_strided_cn) {
13228 TEST_REQUIRES_ARM_NEON;
13229 for (uint32_t n = 16; n <= 24; n += 8) {
13230 for (size_t k = 1; k <= 20; k += 5) {
13231 GemmMicrokernelTester()
13232 .mr(1)
13233 .nr(8)
13234 .kr(1)
13235 .sr(4)
13236 .m(1)
13237 .n(n)
13238 .k(k)
13239 .cn_stride(11)
13240 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13241 }
13242 }
13243 }
13244
13245 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8_subtile) {
13246 TEST_REQUIRES_ARM_NEON;
13247 for (uint32_t n = 16; n <= 24; n += 8) {
13248 for (size_t k = 1; k <= 20; k += 5) {
13249 for (uint32_t m = 1; m <= 1; m++) {
13250 GemmMicrokernelTester()
13251 .mr(1)
13252 .nr(8)
13253 .kr(1)
13254 .sr(4)
13255 .m(m)
13256 .n(n)
13257 .k(k)
13258 .iterations(1)
13259 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13260 }
13261 }
13262 }
13263 }
13264
13265 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, small_kernel) {
13266 TEST_REQUIRES_ARM_NEON;
13267 for (size_t k = 1; k <= 20; k += 5) {
13268 GemmMicrokernelTester()
13269 .mr(1)
13270 .nr(8)
13271 .kr(1)
13272 .sr(4)
13273 .m(1)
13274 .n(8)
13275 .k(k)
13276 .ks(3)
13277 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13278 }
13279 }
13280
13281 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, small_kernel_subtile) {
13282 TEST_REQUIRES_ARM_NEON;
13283 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013284 for (uint32_t n = 1; n <= 8; n++) {
13285 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013286 GemmMicrokernelTester()
13287 .mr(1)
13288 .nr(8)
13289 .kr(1)
13290 .sr(4)
13291 .m(m)
13292 .n(n)
13293 .k(k)
13294 .ks(3)
13295 .iterations(1)
13296 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13297 }
13298 }
13299 }
13300 }
13301
13302 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_gt_8_small_kernel) {
13303 TEST_REQUIRES_ARM_NEON;
13304 for (uint32_t n = 9; n < 16; n++) {
13305 for (size_t k = 1; k <= 20; k += 5) {
13306 GemmMicrokernelTester()
13307 .mr(1)
13308 .nr(8)
13309 .kr(1)
13310 .sr(4)
13311 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013312 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013313 .k(k)
13314 .ks(3)
13315 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13316 }
13317 }
13318 }
13319
13320 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, n_div_8_small_kernel) {
13321 TEST_REQUIRES_ARM_NEON;
13322 for (uint32_t n = 16; n <= 24; n += 8) {
13323 for (size_t k = 1; k <= 20; k += 5) {
13324 GemmMicrokernelTester()
13325 .mr(1)
13326 .nr(8)
13327 .kr(1)
13328 .sr(4)
13329 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013330 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013331 .k(k)
13332 .ks(3)
13333 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13334 }
13335 }
13336 }
13337
13338 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, strided_cm_subtile) {
13339 TEST_REQUIRES_ARM_NEON;
13340 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013341 for (uint32_t n = 1; n <= 8; n++) {
13342 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013343 GemmMicrokernelTester()
13344 .mr(1)
13345 .nr(8)
13346 .kr(1)
13347 .sr(4)
13348 .m(m)
13349 .n(n)
13350 .k(k)
13351 .cm_stride(11)
13352 .iterations(1)
13353 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13354 }
13355 }
13356 }
13357 }
13358
13359 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, a_offset) {
13360 TEST_REQUIRES_ARM_NEON;
13361 for (size_t k = 1; k <= 20; k += 5) {
13362 GemmMicrokernelTester()
13363 .mr(1)
13364 .nr(8)
13365 .kr(1)
13366 .sr(4)
13367 .m(1)
13368 .n(8)
13369 .k(k)
13370 .ks(3)
13371 .a_offset(23)
13372 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13373 }
13374 }
13375
13376 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, zero) {
13377 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013378 for (size_t k = 1; k <= 20; k += 5) {
13379 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013380 GemmMicrokernelTester()
13381 .mr(1)
13382 .nr(8)
13383 .kr(1)
13384 .sr(4)
13385 .m(1)
13386 .n(8)
13387 .k(k)
13388 .ks(3)
13389 .a_offset(23)
13390 .zero_index(mz)
13391 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13392 }
13393 }
13394 }
13395
13396 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, qmin) {
13397 TEST_REQUIRES_ARM_NEON;
13398 GemmMicrokernelTester()
13399 .mr(1)
13400 .nr(8)
13401 .kr(1)
13402 .sr(4)
13403 .m(1)
13404 .n(8)
13405 .k(4)
13406 .qmin(128)
13407 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13408 }
13409
13410 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, qmax) {
13411 TEST_REQUIRES_ARM_NEON;
13412 GemmMicrokernelTester()
13413 .mr(1)
13414 .nr(8)
13415 .kr(1)
13416 .sr(4)
13417 .m(1)
13418 .n(8)
13419 .k(4)
13420 .qmax(128)
13421 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13422 }
13423
13424 TEST(F32_IGEMM_MINMAX_1X8S4__NEON, strided_cm) {
13425 TEST_REQUIRES_ARM_NEON;
13426 GemmMicrokernelTester()
13427 .mr(1)
13428 .nr(8)
13429 .kr(1)
13430 .sr(4)
13431 .m(1)
13432 .n(8)
13433 .k(4)
13434 .cm_stride(11)
13435 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__neon, xnn_init_f32_minmax_scalar_params);
13436 }
13437#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13438
13439
13440#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13441 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4) {
13442 TEST_REQUIRES_ARM_NEON;
13443 GemmMicrokernelTester()
13444 .mr(4)
13445 .nr(8)
13446 .kr(1)
13447 .sr(4)
13448 .m(4)
13449 .n(8)
13450 .k(4)
13451 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13452 }
13453
13454 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, strided_cn) {
13455 TEST_REQUIRES_ARM_NEON;
13456 GemmMicrokernelTester()
13457 .mr(4)
13458 .nr(8)
13459 .kr(1)
13460 .sr(4)
13461 .m(4)
13462 .n(8)
13463 .k(4)
13464 .cn_stride(11)
13465 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13466 }
13467
13468 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile) {
13469 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013470 for (uint32_t n = 1; n <= 8; n++) {
13471 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013472 GemmMicrokernelTester()
13473 .mr(4)
13474 .nr(8)
13475 .kr(1)
13476 .sr(4)
13477 .m(m)
13478 .n(n)
13479 .k(4)
13480 .iterations(1)
13481 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13482 }
13483 }
13484 }
13485
13486 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile_m) {
13487 TEST_REQUIRES_ARM_NEON;
13488 for (uint32_t m = 1; m <= 4; m++) {
13489 GemmMicrokernelTester()
13490 .mr(4)
13491 .nr(8)
13492 .kr(1)
13493 .sr(4)
13494 .m(m)
13495 .n(8)
13496 .k(4)
13497 .iterations(1)
13498 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13499 }
13500 }
13501
13502 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_eq_4_subtile_n) {
13503 TEST_REQUIRES_ARM_NEON;
13504 for (uint32_t n = 1; n <= 8; n++) {
13505 GemmMicrokernelTester()
13506 .mr(4)
13507 .nr(8)
13508 .kr(1)
13509 .sr(4)
13510 .m(4)
13511 .n(n)
13512 .k(4)
13513 .iterations(1)
13514 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13515 }
13516 }
13517
13518 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_lt_4) {
13519 TEST_REQUIRES_ARM_NEON;
13520 for (size_t k = 1; k < 4; k++) {
13521 GemmMicrokernelTester()
13522 .mr(4)
13523 .nr(8)
13524 .kr(1)
13525 .sr(4)
13526 .m(4)
13527 .n(8)
13528 .k(k)
13529 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13530 }
13531 }
13532
13533 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_lt_4_subtile) {
13534 TEST_REQUIRES_ARM_NEON;
13535 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013536 for (uint32_t n = 1; n <= 8; n++) {
13537 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013538 GemmMicrokernelTester()
13539 .mr(4)
13540 .nr(8)
13541 .kr(1)
13542 .sr(4)
13543 .m(m)
13544 .n(n)
13545 .k(k)
13546 .iterations(1)
13547 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13548 }
13549 }
13550 }
13551 }
13552
13553 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_gt_4) {
13554 TEST_REQUIRES_ARM_NEON;
13555 for (size_t k = 5; k < 8; k++) {
13556 GemmMicrokernelTester()
13557 .mr(4)
13558 .nr(8)
13559 .kr(1)
13560 .sr(4)
13561 .m(4)
13562 .n(8)
13563 .k(k)
13564 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13565 }
13566 }
13567
13568 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_gt_4_subtile) {
13569 TEST_REQUIRES_ARM_NEON;
13570 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013571 for (uint32_t n = 1; n <= 8; n++) {
13572 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013573 GemmMicrokernelTester()
13574 .mr(4)
13575 .nr(8)
13576 .kr(1)
13577 .sr(4)
13578 .m(m)
13579 .n(n)
13580 .k(k)
13581 .iterations(1)
13582 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13583 }
13584 }
13585 }
13586 }
13587
13588 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_div_4) {
13589 TEST_REQUIRES_ARM_NEON;
13590 for (size_t k = 8; k <= 40; k += 4) {
13591 GemmMicrokernelTester()
13592 .mr(4)
13593 .nr(8)
13594 .kr(1)
13595 .sr(4)
13596 .m(4)
13597 .n(8)
13598 .k(k)
13599 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13600 }
13601 }
13602
13603 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, k_div_4_subtile) {
13604 TEST_REQUIRES_ARM_NEON;
13605 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013606 for (uint32_t n = 1; n <= 8; n++) {
13607 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013608 GemmMicrokernelTester()
13609 .mr(4)
13610 .nr(8)
13611 .kr(1)
13612 .sr(4)
13613 .m(m)
13614 .n(n)
13615 .k(k)
13616 .iterations(1)
13617 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13618 }
13619 }
13620 }
13621 }
13622
13623 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8) {
13624 TEST_REQUIRES_ARM_NEON;
13625 for (uint32_t n = 9; n < 16; n++) {
13626 for (size_t k = 1; k <= 20; k += 5) {
13627 GemmMicrokernelTester()
13628 .mr(4)
13629 .nr(8)
13630 .kr(1)
13631 .sr(4)
13632 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013633 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013634 .k(k)
13635 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13636 }
13637 }
13638 }
13639
13640 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8_strided_cn) {
13641 TEST_REQUIRES_ARM_NEON;
13642 for (uint32_t n = 9; n < 16; n++) {
13643 for (size_t k = 1; k <= 20; k += 5) {
13644 GemmMicrokernelTester()
13645 .mr(4)
13646 .nr(8)
13647 .kr(1)
13648 .sr(4)
13649 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013650 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013651 .k(k)
13652 .cn_stride(11)
13653 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13654 }
13655 }
13656 }
13657
13658 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8_subtile) {
13659 TEST_REQUIRES_ARM_NEON;
13660 for (uint32_t n = 9; n < 16; n++) {
13661 for (size_t k = 1; k <= 20; k += 5) {
13662 for (uint32_t m = 1; m <= 4; m++) {
13663 GemmMicrokernelTester()
13664 .mr(4)
13665 .nr(8)
13666 .kr(1)
13667 .sr(4)
13668 .m(m)
13669 .n(n)
13670 .k(k)
13671 .iterations(1)
13672 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13673 }
13674 }
13675 }
13676 }
13677
13678 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8) {
13679 TEST_REQUIRES_ARM_NEON;
13680 for (uint32_t n = 16; n <= 24; n += 8) {
13681 for (size_t k = 1; k <= 20; k += 5) {
13682 GemmMicrokernelTester()
13683 .mr(4)
13684 .nr(8)
13685 .kr(1)
13686 .sr(4)
13687 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013688 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013689 .k(k)
13690 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13691 }
13692 }
13693 }
13694
13695 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8_strided_cn) {
13696 TEST_REQUIRES_ARM_NEON;
13697 for (uint32_t n = 16; n <= 24; n += 8) {
13698 for (size_t k = 1; k <= 20; k += 5) {
13699 GemmMicrokernelTester()
13700 .mr(4)
13701 .nr(8)
13702 .kr(1)
13703 .sr(4)
13704 .m(4)
13705 .n(n)
13706 .k(k)
13707 .cn_stride(11)
13708 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13709 }
13710 }
13711 }
13712
13713 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8_subtile) {
13714 TEST_REQUIRES_ARM_NEON;
13715 for (uint32_t n = 16; n <= 24; n += 8) {
13716 for (size_t k = 1; k <= 20; k += 5) {
13717 for (uint32_t m = 1; m <= 4; m++) {
13718 GemmMicrokernelTester()
13719 .mr(4)
13720 .nr(8)
13721 .kr(1)
13722 .sr(4)
13723 .m(m)
13724 .n(n)
13725 .k(k)
13726 .iterations(1)
13727 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13728 }
13729 }
13730 }
13731 }
13732
13733 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, small_kernel) {
13734 TEST_REQUIRES_ARM_NEON;
13735 for (size_t k = 1; k <= 20; k += 5) {
13736 GemmMicrokernelTester()
13737 .mr(4)
13738 .nr(8)
13739 .kr(1)
13740 .sr(4)
13741 .m(4)
13742 .n(8)
13743 .k(k)
13744 .ks(3)
13745 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13746 }
13747 }
13748
13749 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, small_kernel_subtile) {
13750 TEST_REQUIRES_ARM_NEON;
13751 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013752 for (uint32_t n = 1; n <= 8; n++) {
13753 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013754 GemmMicrokernelTester()
13755 .mr(4)
13756 .nr(8)
13757 .kr(1)
13758 .sr(4)
13759 .m(m)
13760 .n(n)
13761 .k(k)
13762 .ks(3)
13763 .iterations(1)
13764 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13765 }
13766 }
13767 }
13768 }
13769
13770 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_gt_8_small_kernel) {
13771 TEST_REQUIRES_ARM_NEON;
13772 for (uint32_t n = 9; n < 16; n++) {
13773 for (size_t k = 1; k <= 20; k += 5) {
13774 GemmMicrokernelTester()
13775 .mr(4)
13776 .nr(8)
13777 .kr(1)
13778 .sr(4)
13779 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013780 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013781 .k(k)
13782 .ks(3)
13783 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13784 }
13785 }
13786 }
13787
13788 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, n_div_8_small_kernel) {
13789 TEST_REQUIRES_ARM_NEON;
13790 for (uint32_t n = 16; n <= 24; n += 8) {
13791 for (size_t k = 1; k <= 20; k += 5) {
13792 GemmMicrokernelTester()
13793 .mr(4)
13794 .nr(8)
13795 .kr(1)
13796 .sr(4)
13797 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013798 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013799 .k(k)
13800 .ks(3)
13801 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13802 }
13803 }
13804 }
13805
13806 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, strided_cm_subtile) {
13807 TEST_REQUIRES_ARM_NEON;
13808 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013809 for (uint32_t n = 1; n <= 8; n++) {
13810 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013811 GemmMicrokernelTester()
13812 .mr(4)
13813 .nr(8)
13814 .kr(1)
13815 .sr(4)
13816 .m(m)
13817 .n(n)
13818 .k(k)
13819 .cm_stride(11)
13820 .iterations(1)
13821 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13822 }
13823 }
13824 }
13825 }
13826
13827 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, a_offset) {
13828 TEST_REQUIRES_ARM_NEON;
13829 for (size_t k = 1; k <= 20; k += 5) {
13830 GemmMicrokernelTester()
13831 .mr(4)
13832 .nr(8)
13833 .kr(1)
13834 .sr(4)
13835 .m(4)
13836 .n(8)
13837 .k(k)
13838 .ks(3)
13839 .a_offset(83)
13840 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13841 }
13842 }
13843
13844 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, zero) {
13845 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013846 for (size_t k = 1; k <= 20; k += 5) {
13847 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013848 GemmMicrokernelTester()
13849 .mr(4)
13850 .nr(8)
13851 .kr(1)
13852 .sr(4)
13853 .m(4)
13854 .n(8)
13855 .k(k)
13856 .ks(3)
13857 .a_offset(83)
13858 .zero_index(mz)
13859 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13860 }
13861 }
13862 }
13863
13864 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, qmin) {
13865 TEST_REQUIRES_ARM_NEON;
13866 GemmMicrokernelTester()
13867 .mr(4)
13868 .nr(8)
13869 .kr(1)
13870 .sr(4)
13871 .m(4)
13872 .n(8)
13873 .k(4)
13874 .qmin(128)
13875 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13876 }
13877
13878 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, qmax) {
13879 TEST_REQUIRES_ARM_NEON;
13880 GemmMicrokernelTester()
13881 .mr(4)
13882 .nr(8)
13883 .kr(1)
13884 .sr(4)
13885 .m(4)
13886 .n(8)
13887 .k(4)
13888 .qmax(128)
13889 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13890 }
13891
13892 TEST(F32_IGEMM_MINMAX_4X8S4__NEON, strided_cm) {
13893 TEST_REQUIRES_ARM_NEON;
13894 GemmMicrokernelTester()
13895 .mr(4)
13896 .nr(8)
13897 .kr(1)
13898 .sr(4)
13899 .m(4)
13900 .n(8)
13901 .k(4)
13902 .cm_stride(11)
13903 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neon, xnn_init_f32_minmax_scalar_params);
13904 }
13905#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13906
13907
13908#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13909 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4) {
13910 TEST_REQUIRES_ARM_NEON_FMA;
13911 GemmMicrokernelTester()
13912 .mr(4)
13913 .nr(8)
13914 .kr(1)
13915 .sr(4)
13916 .m(4)
13917 .n(8)
13918 .k(4)
13919 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
13920 }
13921
13922 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, strided_cn) {
13923 TEST_REQUIRES_ARM_NEON_FMA;
13924 GemmMicrokernelTester()
13925 .mr(4)
13926 .nr(8)
13927 .kr(1)
13928 .sr(4)
13929 .m(4)
13930 .n(8)
13931 .k(4)
13932 .cn_stride(11)
13933 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
13934 }
13935
13936 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile) {
13937 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013938 for (uint32_t n = 1; n <= 8; n++) {
13939 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013940 GemmMicrokernelTester()
13941 .mr(4)
13942 .nr(8)
13943 .kr(1)
13944 .sr(4)
13945 .m(m)
13946 .n(n)
13947 .k(4)
13948 .iterations(1)
13949 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
13950 }
13951 }
13952 }
13953
13954 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_m) {
13955 TEST_REQUIRES_ARM_NEON_FMA;
13956 for (uint32_t m = 1; m <= 4; m++) {
13957 GemmMicrokernelTester()
13958 .mr(4)
13959 .nr(8)
13960 .kr(1)
13961 .sr(4)
13962 .m(m)
13963 .n(8)
13964 .k(4)
13965 .iterations(1)
13966 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
13967 }
13968 }
13969
13970 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_eq_4_subtile_n) {
13971 TEST_REQUIRES_ARM_NEON_FMA;
13972 for (uint32_t n = 1; n <= 8; n++) {
13973 GemmMicrokernelTester()
13974 .mr(4)
13975 .nr(8)
13976 .kr(1)
13977 .sr(4)
13978 .m(4)
13979 .n(n)
13980 .k(4)
13981 .iterations(1)
13982 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
13983 }
13984 }
13985
13986 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_lt_4) {
13987 TEST_REQUIRES_ARM_NEON_FMA;
13988 for (size_t k = 1; k < 4; k++) {
13989 GemmMicrokernelTester()
13990 .mr(4)
13991 .nr(8)
13992 .kr(1)
13993 .sr(4)
13994 .m(4)
13995 .n(8)
13996 .k(k)
13997 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
13998 }
13999 }
14000
14001 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_lt_4_subtile) {
14002 TEST_REQUIRES_ARM_NEON_FMA;
14003 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014004 for (uint32_t n = 1; n <= 8; n++) {
14005 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014006 GemmMicrokernelTester()
14007 .mr(4)
14008 .nr(8)
14009 .kr(1)
14010 .sr(4)
14011 .m(m)
14012 .n(n)
14013 .k(k)
14014 .iterations(1)
14015 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14016 }
14017 }
14018 }
14019 }
14020
14021 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_gt_4) {
14022 TEST_REQUIRES_ARM_NEON_FMA;
14023 for (size_t k = 5; k < 8; k++) {
14024 GemmMicrokernelTester()
14025 .mr(4)
14026 .nr(8)
14027 .kr(1)
14028 .sr(4)
14029 .m(4)
14030 .n(8)
14031 .k(k)
14032 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14033 }
14034 }
14035
14036 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_gt_4_subtile) {
14037 TEST_REQUIRES_ARM_NEON_FMA;
14038 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014039 for (uint32_t n = 1; n <= 8; n++) {
14040 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014041 GemmMicrokernelTester()
14042 .mr(4)
14043 .nr(8)
14044 .kr(1)
14045 .sr(4)
14046 .m(m)
14047 .n(n)
14048 .k(k)
14049 .iterations(1)
14050 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14051 }
14052 }
14053 }
14054 }
14055
14056 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_div_4) {
14057 TEST_REQUIRES_ARM_NEON_FMA;
14058 for (size_t k = 8; k <= 40; k += 4) {
14059 GemmMicrokernelTester()
14060 .mr(4)
14061 .nr(8)
14062 .kr(1)
14063 .sr(4)
14064 .m(4)
14065 .n(8)
14066 .k(k)
14067 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14068 }
14069 }
14070
14071 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, k_div_4_subtile) {
14072 TEST_REQUIRES_ARM_NEON_FMA;
14073 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014074 for (uint32_t n = 1; n <= 8; n++) {
14075 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014076 GemmMicrokernelTester()
14077 .mr(4)
14078 .nr(8)
14079 .kr(1)
14080 .sr(4)
14081 .m(m)
14082 .n(n)
14083 .k(k)
14084 .iterations(1)
14085 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14086 }
14087 }
14088 }
14089 }
14090
14091 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8) {
14092 TEST_REQUIRES_ARM_NEON_FMA;
14093 for (uint32_t n = 9; n < 16; n++) {
14094 for (size_t k = 1; k <= 20; k += 5) {
14095 GemmMicrokernelTester()
14096 .mr(4)
14097 .nr(8)
14098 .kr(1)
14099 .sr(4)
14100 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014101 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014102 .k(k)
14103 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14104 }
14105 }
14106 }
14107
14108 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8_strided_cn) {
14109 TEST_REQUIRES_ARM_NEON_FMA;
14110 for (uint32_t n = 9; n < 16; n++) {
14111 for (size_t k = 1; k <= 20; k += 5) {
14112 GemmMicrokernelTester()
14113 .mr(4)
14114 .nr(8)
14115 .kr(1)
14116 .sr(4)
14117 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014118 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014119 .k(k)
14120 .cn_stride(11)
14121 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14122 }
14123 }
14124 }
14125
14126 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8_subtile) {
14127 TEST_REQUIRES_ARM_NEON_FMA;
14128 for (uint32_t n = 9; n < 16; n++) {
14129 for (size_t k = 1; k <= 20; k += 5) {
14130 for (uint32_t m = 1; m <= 4; m++) {
14131 GemmMicrokernelTester()
14132 .mr(4)
14133 .nr(8)
14134 .kr(1)
14135 .sr(4)
14136 .m(m)
14137 .n(n)
14138 .k(k)
14139 .iterations(1)
14140 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14141 }
14142 }
14143 }
14144 }
14145
14146 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8) {
14147 TEST_REQUIRES_ARM_NEON_FMA;
14148 for (uint32_t n = 16; n <= 24; n += 8) {
14149 for (size_t k = 1; k <= 20; k += 5) {
14150 GemmMicrokernelTester()
14151 .mr(4)
14152 .nr(8)
14153 .kr(1)
14154 .sr(4)
14155 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014156 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014157 .k(k)
14158 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14159 }
14160 }
14161 }
14162
14163 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8_strided_cn) {
14164 TEST_REQUIRES_ARM_NEON_FMA;
14165 for (uint32_t n = 16; n <= 24; n += 8) {
14166 for (size_t k = 1; k <= 20; k += 5) {
14167 GemmMicrokernelTester()
14168 .mr(4)
14169 .nr(8)
14170 .kr(1)
14171 .sr(4)
14172 .m(4)
14173 .n(n)
14174 .k(k)
14175 .cn_stride(11)
14176 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14177 }
14178 }
14179 }
14180
14181 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8_subtile) {
14182 TEST_REQUIRES_ARM_NEON_FMA;
14183 for (uint32_t n = 16; n <= 24; n += 8) {
14184 for (size_t k = 1; k <= 20; k += 5) {
14185 for (uint32_t m = 1; m <= 4; m++) {
14186 GemmMicrokernelTester()
14187 .mr(4)
14188 .nr(8)
14189 .kr(1)
14190 .sr(4)
14191 .m(m)
14192 .n(n)
14193 .k(k)
14194 .iterations(1)
14195 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14196 }
14197 }
14198 }
14199 }
14200
14201 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, small_kernel) {
14202 TEST_REQUIRES_ARM_NEON_FMA;
14203 for (size_t k = 1; k <= 20; k += 5) {
14204 GemmMicrokernelTester()
14205 .mr(4)
14206 .nr(8)
14207 .kr(1)
14208 .sr(4)
14209 .m(4)
14210 .n(8)
14211 .k(k)
14212 .ks(3)
14213 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14214 }
14215 }
14216
14217 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, small_kernel_subtile) {
14218 TEST_REQUIRES_ARM_NEON_FMA;
14219 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014220 for (uint32_t n = 1; n <= 8; n++) {
14221 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014222 GemmMicrokernelTester()
14223 .mr(4)
14224 .nr(8)
14225 .kr(1)
14226 .sr(4)
14227 .m(m)
14228 .n(n)
14229 .k(k)
14230 .ks(3)
14231 .iterations(1)
14232 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14233 }
14234 }
14235 }
14236 }
14237
14238 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_gt_8_small_kernel) {
14239 TEST_REQUIRES_ARM_NEON_FMA;
14240 for (uint32_t n = 9; n < 16; n++) {
14241 for (size_t k = 1; k <= 20; k += 5) {
14242 GemmMicrokernelTester()
14243 .mr(4)
14244 .nr(8)
14245 .kr(1)
14246 .sr(4)
14247 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014248 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014249 .k(k)
14250 .ks(3)
14251 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14252 }
14253 }
14254 }
14255
14256 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, n_div_8_small_kernel) {
14257 TEST_REQUIRES_ARM_NEON_FMA;
14258 for (uint32_t n = 16; n <= 24; n += 8) {
14259 for (size_t k = 1; k <= 20; k += 5) {
14260 GemmMicrokernelTester()
14261 .mr(4)
14262 .nr(8)
14263 .kr(1)
14264 .sr(4)
14265 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014266 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014267 .k(k)
14268 .ks(3)
14269 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14270 }
14271 }
14272 }
14273
14274 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, strided_cm_subtile) {
14275 TEST_REQUIRES_ARM_NEON_FMA;
14276 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014277 for (uint32_t n = 1; n <= 8; n++) {
14278 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014279 GemmMicrokernelTester()
14280 .mr(4)
14281 .nr(8)
14282 .kr(1)
14283 .sr(4)
14284 .m(m)
14285 .n(n)
14286 .k(k)
14287 .cm_stride(11)
14288 .iterations(1)
14289 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14290 }
14291 }
14292 }
14293 }
14294
14295 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, a_offset) {
14296 TEST_REQUIRES_ARM_NEON_FMA;
14297 for (size_t k = 1; k <= 20; k += 5) {
14298 GemmMicrokernelTester()
14299 .mr(4)
14300 .nr(8)
14301 .kr(1)
14302 .sr(4)
14303 .m(4)
14304 .n(8)
14305 .k(k)
14306 .ks(3)
14307 .a_offset(83)
14308 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14309 }
14310 }
14311
14312 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, zero) {
14313 TEST_REQUIRES_ARM_NEON_FMA;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014314 for (size_t k = 1; k <= 20; k += 5) {
14315 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014316 GemmMicrokernelTester()
14317 .mr(4)
14318 .nr(8)
14319 .kr(1)
14320 .sr(4)
14321 .m(4)
14322 .n(8)
14323 .k(k)
14324 .ks(3)
14325 .a_offset(83)
14326 .zero_index(mz)
14327 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14328 }
14329 }
14330 }
14331
14332 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, qmin) {
14333 TEST_REQUIRES_ARM_NEON_FMA;
14334 GemmMicrokernelTester()
14335 .mr(4)
14336 .nr(8)
14337 .kr(1)
14338 .sr(4)
14339 .m(4)
14340 .n(8)
14341 .k(4)
14342 .qmin(128)
14343 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14344 }
14345
14346 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, qmax) {
14347 TEST_REQUIRES_ARM_NEON_FMA;
14348 GemmMicrokernelTester()
14349 .mr(4)
14350 .nr(8)
14351 .kr(1)
14352 .sr(4)
14353 .m(4)
14354 .n(8)
14355 .k(4)
14356 .qmax(128)
14357 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14358 }
14359
14360 TEST(F32_IGEMM_MINMAX_4X8S4__NEONFMA, strided_cm) {
14361 TEST_REQUIRES_ARM_NEON_FMA;
14362 GemmMicrokernelTester()
14363 .mr(4)
14364 .nr(8)
14365 .kr(1)
14366 .sr(4)
14367 .m(4)
14368 .n(8)
14369 .k(4)
14370 .cm_stride(11)
14371 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, xnn_init_f32_minmax_scalar_params);
14372 }
14373#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14374
14375
14376#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14377 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1) {
14378 TEST_REQUIRES_X86_SSE;
14379 GemmMicrokernelTester()
14380 .mr(1)
14381 .nr(8)
14382 .kr(1)
14383 .sr(1)
14384 .m(1)
14385 .n(8)
14386 .k(1)
14387 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14388 }
14389
14390 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, strided_cn) {
14391 TEST_REQUIRES_X86_SSE;
14392 GemmMicrokernelTester()
14393 .mr(1)
14394 .nr(8)
14395 .kr(1)
14396 .sr(1)
14397 .m(1)
14398 .n(8)
14399 .k(1)
14400 .cn_stride(11)
14401 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14402 }
14403
14404 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile) {
14405 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014406 for (uint32_t n = 1; n <= 8; n++) {
14407 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014408 GemmMicrokernelTester()
14409 .mr(1)
14410 .nr(8)
14411 .kr(1)
14412 .sr(1)
14413 .m(m)
14414 .n(n)
14415 .k(1)
14416 .iterations(1)
14417 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14418 }
14419 }
14420 }
14421
14422 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_m) {
14423 TEST_REQUIRES_X86_SSE;
14424 for (uint32_t m = 1; m <= 1; m++) {
14425 GemmMicrokernelTester()
14426 .mr(1)
14427 .nr(8)
14428 .kr(1)
14429 .sr(1)
14430 .m(m)
14431 .n(8)
14432 .k(1)
14433 .iterations(1)
14434 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14435 }
14436 }
14437
14438 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_eq_1_subtile_n) {
14439 TEST_REQUIRES_X86_SSE;
14440 for (uint32_t n = 1; n <= 8; n++) {
14441 GemmMicrokernelTester()
14442 .mr(1)
14443 .nr(8)
14444 .kr(1)
14445 .sr(1)
14446 .m(1)
14447 .n(n)
14448 .k(1)
14449 .iterations(1)
14450 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14451 }
14452 }
14453
14454 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_gt_1) {
14455 TEST_REQUIRES_X86_SSE;
14456 for (size_t k = 2; k < 10; k++) {
14457 GemmMicrokernelTester()
14458 .mr(1)
14459 .nr(8)
14460 .kr(1)
14461 .sr(1)
14462 .m(1)
14463 .n(8)
14464 .k(k)
14465 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14466 }
14467 }
14468
14469 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, k_gt_1_subtile) {
14470 TEST_REQUIRES_X86_SSE;
14471 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014472 for (uint32_t n = 1; n <= 8; n++) {
14473 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014474 GemmMicrokernelTester()
14475 .mr(1)
14476 .nr(8)
14477 .kr(1)
14478 .sr(1)
14479 .m(m)
14480 .n(n)
14481 .k(k)
14482 .iterations(1)
14483 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14484 }
14485 }
14486 }
14487 }
14488
14489 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8) {
14490 TEST_REQUIRES_X86_SSE;
14491 for (uint32_t n = 9; n < 16; n++) {
14492 for (size_t k = 1; k <= 5; k += 2) {
14493 GemmMicrokernelTester()
14494 .mr(1)
14495 .nr(8)
14496 .kr(1)
14497 .sr(1)
14498 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014499 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014500 .k(k)
14501 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14502 }
14503 }
14504 }
14505
14506 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8_strided_cn) {
14507 TEST_REQUIRES_X86_SSE;
14508 for (uint32_t n = 9; n < 16; n++) {
14509 for (size_t k = 1; k <= 5; k += 2) {
14510 GemmMicrokernelTester()
14511 .mr(1)
14512 .nr(8)
14513 .kr(1)
14514 .sr(1)
14515 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014516 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014517 .k(k)
14518 .cn_stride(11)
14519 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14520 }
14521 }
14522 }
14523
14524 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8_subtile) {
14525 TEST_REQUIRES_X86_SSE;
14526 for (uint32_t n = 9; n < 16; n++) {
14527 for (size_t k = 1; k <= 5; k += 2) {
14528 for (uint32_t m = 1; m <= 1; m++) {
14529 GemmMicrokernelTester()
14530 .mr(1)
14531 .nr(8)
14532 .kr(1)
14533 .sr(1)
14534 .m(m)
14535 .n(n)
14536 .k(k)
14537 .iterations(1)
14538 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14539 }
14540 }
14541 }
14542 }
14543
14544 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8) {
14545 TEST_REQUIRES_X86_SSE;
14546 for (uint32_t n = 16; n <= 24; n += 8) {
14547 for (size_t k = 1; k <= 5; k += 2) {
14548 GemmMicrokernelTester()
14549 .mr(1)
14550 .nr(8)
14551 .kr(1)
14552 .sr(1)
14553 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014554 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014555 .k(k)
14556 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14557 }
14558 }
14559 }
14560
14561 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8_strided_cn) {
14562 TEST_REQUIRES_X86_SSE;
14563 for (uint32_t n = 16; n <= 24; n += 8) {
14564 for (size_t k = 1; k <= 5; k += 2) {
14565 GemmMicrokernelTester()
14566 .mr(1)
14567 .nr(8)
14568 .kr(1)
14569 .sr(1)
14570 .m(1)
14571 .n(n)
14572 .k(k)
14573 .cn_stride(11)
14574 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14575 }
14576 }
14577 }
14578
14579 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8_subtile) {
14580 TEST_REQUIRES_X86_SSE;
14581 for (uint32_t n = 16; n <= 24; n += 8) {
14582 for (size_t k = 1; k <= 5; k += 2) {
14583 for (uint32_t m = 1; m <= 1; m++) {
14584 GemmMicrokernelTester()
14585 .mr(1)
14586 .nr(8)
14587 .kr(1)
14588 .sr(1)
14589 .m(m)
14590 .n(n)
14591 .k(k)
14592 .iterations(1)
14593 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14594 }
14595 }
14596 }
14597 }
14598
14599 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, small_kernel) {
14600 TEST_REQUIRES_X86_SSE;
14601 for (size_t k = 1; k <= 5; k += 2) {
14602 GemmMicrokernelTester()
14603 .mr(1)
14604 .nr(8)
14605 .kr(1)
14606 .sr(1)
14607 .m(1)
14608 .n(8)
14609 .k(k)
14610 .ks(3)
14611 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14612 }
14613 }
14614
14615 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, small_kernel_subtile) {
14616 TEST_REQUIRES_X86_SSE;
14617 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014618 for (uint32_t n = 1; n <= 8; n++) {
14619 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014620 GemmMicrokernelTester()
14621 .mr(1)
14622 .nr(8)
14623 .kr(1)
14624 .sr(1)
14625 .m(m)
14626 .n(n)
14627 .k(k)
14628 .ks(3)
14629 .iterations(1)
14630 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14631 }
14632 }
14633 }
14634 }
14635
14636 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_gt_8_small_kernel) {
14637 TEST_REQUIRES_X86_SSE;
14638 for (uint32_t n = 9; n < 16; n++) {
14639 for (size_t k = 1; k <= 5; k += 2) {
14640 GemmMicrokernelTester()
14641 .mr(1)
14642 .nr(8)
14643 .kr(1)
14644 .sr(1)
14645 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014646 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014647 .k(k)
14648 .ks(3)
14649 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14650 }
14651 }
14652 }
14653
14654 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, n_div_8_small_kernel) {
14655 TEST_REQUIRES_X86_SSE;
14656 for (uint32_t n = 16; n <= 24; n += 8) {
14657 for (size_t k = 1; k <= 5; k += 2) {
14658 GemmMicrokernelTester()
14659 .mr(1)
14660 .nr(8)
14661 .kr(1)
14662 .sr(1)
14663 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014664 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014665 .k(k)
14666 .ks(3)
14667 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14668 }
14669 }
14670 }
14671
14672 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, strided_cm_subtile) {
14673 TEST_REQUIRES_X86_SSE;
14674 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014675 for (uint32_t n = 1; n <= 8; n++) {
14676 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014677 GemmMicrokernelTester()
14678 .mr(1)
14679 .nr(8)
14680 .kr(1)
14681 .sr(1)
14682 .m(m)
14683 .n(n)
14684 .k(k)
14685 .cm_stride(11)
14686 .iterations(1)
14687 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14688 }
14689 }
14690 }
14691 }
14692
14693 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, a_offset) {
14694 TEST_REQUIRES_X86_SSE;
14695 for (size_t k = 1; k <= 5; k += 2) {
14696 GemmMicrokernelTester()
14697 .mr(1)
14698 .nr(8)
14699 .kr(1)
14700 .sr(1)
14701 .m(1)
14702 .n(8)
14703 .k(k)
14704 .ks(3)
14705 .a_offset(7)
14706 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14707 }
14708 }
14709
14710 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, zero) {
14711 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014712 for (size_t k = 1; k <= 5; k += 2) {
14713 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014714 GemmMicrokernelTester()
14715 .mr(1)
14716 .nr(8)
14717 .kr(1)
14718 .sr(1)
14719 .m(1)
14720 .n(8)
14721 .k(k)
14722 .ks(3)
14723 .a_offset(7)
14724 .zero_index(mz)
14725 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14726 }
14727 }
14728 }
14729
14730 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, qmin) {
14731 TEST_REQUIRES_X86_SSE;
14732 GemmMicrokernelTester()
14733 .mr(1)
14734 .nr(8)
14735 .kr(1)
14736 .sr(1)
14737 .m(1)
14738 .n(8)
14739 .k(1)
14740 .qmin(128)
14741 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14742 }
14743
14744 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, qmax) {
14745 TEST_REQUIRES_X86_SSE;
14746 GemmMicrokernelTester()
14747 .mr(1)
14748 .nr(8)
14749 .kr(1)
14750 .sr(1)
14751 .m(1)
14752 .n(8)
14753 .k(1)
14754 .qmax(128)
14755 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14756 }
14757
14758 TEST(F32_IGEMM_MINMAX_1X8__SSE_LOAD1, strided_cm) {
14759 TEST_REQUIRES_X86_SSE;
14760 GemmMicrokernelTester()
14761 .mr(1)
14762 .nr(8)
14763 .kr(1)
14764 .sr(1)
14765 .m(1)
14766 .n(8)
14767 .k(1)
14768 .cm_stride(11)
14769 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, xnn_init_f32_minmax_sse_params);
14770 }
14771#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14772
14773
14774#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14775 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4) {
14776 TEST_REQUIRES_X86_SSE;
14777 GemmMicrokernelTester()
14778 .mr(1)
14779 .nr(8)
14780 .kr(1)
14781 .sr(1)
14782 .m(1)
14783 .n(8)
14784 .k(4)
14785 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14786 }
14787
14788 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, strided_cn) {
14789 TEST_REQUIRES_X86_SSE;
14790 GemmMicrokernelTester()
14791 .mr(1)
14792 .nr(8)
14793 .kr(1)
14794 .sr(1)
14795 .m(1)
14796 .n(8)
14797 .k(4)
14798 .cn_stride(11)
14799 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14800 }
14801
14802 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4_subtile) {
14803 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014804 for (uint32_t n = 1; n <= 8; n++) {
14805 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014806 GemmMicrokernelTester()
14807 .mr(1)
14808 .nr(8)
14809 .kr(1)
14810 .sr(1)
14811 .m(m)
14812 .n(n)
14813 .k(4)
14814 .iterations(1)
14815 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14816 }
14817 }
14818 }
14819
14820 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_m) {
14821 TEST_REQUIRES_X86_SSE;
14822 for (uint32_t m = 1; m <= 1; m++) {
14823 GemmMicrokernelTester()
14824 .mr(1)
14825 .nr(8)
14826 .kr(1)
14827 .sr(1)
14828 .m(m)
14829 .n(8)
14830 .k(4)
14831 .iterations(1)
14832 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14833 }
14834 }
14835
14836 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_eq_4_subtile_n) {
14837 TEST_REQUIRES_X86_SSE;
14838 for (uint32_t n = 1; n <= 8; n++) {
14839 GemmMicrokernelTester()
14840 .mr(1)
14841 .nr(8)
14842 .kr(1)
14843 .sr(1)
14844 .m(1)
14845 .n(n)
14846 .k(4)
14847 .iterations(1)
14848 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14849 }
14850 }
14851
14852 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_lt_4) {
14853 TEST_REQUIRES_X86_SSE;
14854 for (size_t k = 1; k < 4; k++) {
14855 GemmMicrokernelTester()
14856 .mr(1)
14857 .nr(8)
14858 .kr(1)
14859 .sr(1)
14860 .m(1)
14861 .n(8)
14862 .k(k)
14863 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14864 }
14865 }
14866
14867 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_lt_4_subtile) {
14868 TEST_REQUIRES_X86_SSE;
14869 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014870 for (uint32_t n = 1; n <= 8; n++) {
14871 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014872 GemmMicrokernelTester()
14873 .mr(1)
14874 .nr(8)
14875 .kr(1)
14876 .sr(1)
14877 .m(m)
14878 .n(n)
14879 .k(k)
14880 .iterations(1)
14881 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14882 }
14883 }
14884 }
14885 }
14886
14887 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_gt_4) {
14888 TEST_REQUIRES_X86_SSE;
14889 for (size_t k = 5; k < 8; k++) {
14890 GemmMicrokernelTester()
14891 .mr(1)
14892 .nr(8)
14893 .kr(1)
14894 .sr(1)
14895 .m(1)
14896 .n(8)
14897 .k(k)
14898 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14899 }
14900 }
14901
14902 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_gt_4_subtile) {
14903 TEST_REQUIRES_X86_SSE;
14904 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014905 for (uint32_t n = 1; n <= 8; n++) {
14906 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014907 GemmMicrokernelTester()
14908 .mr(1)
14909 .nr(8)
14910 .kr(1)
14911 .sr(1)
14912 .m(m)
14913 .n(n)
14914 .k(k)
14915 .iterations(1)
14916 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14917 }
14918 }
14919 }
14920 }
14921
14922 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_div_4) {
14923 TEST_REQUIRES_X86_SSE;
14924 for (size_t k = 8; k <= 40; k += 4) {
14925 GemmMicrokernelTester()
14926 .mr(1)
14927 .nr(8)
14928 .kr(1)
14929 .sr(1)
14930 .m(1)
14931 .n(8)
14932 .k(k)
14933 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14934 }
14935 }
14936
14937 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, k_div_4_subtile) {
14938 TEST_REQUIRES_X86_SSE;
14939 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014940 for (uint32_t n = 1; n <= 8; n++) {
14941 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014942 GemmMicrokernelTester()
14943 .mr(1)
14944 .nr(8)
14945 .kr(1)
14946 .sr(1)
14947 .m(m)
14948 .n(n)
14949 .k(k)
14950 .iterations(1)
14951 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14952 }
14953 }
14954 }
14955 }
14956
14957 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8) {
14958 TEST_REQUIRES_X86_SSE;
14959 for (uint32_t n = 9; n < 16; n++) {
14960 for (size_t k = 1; k <= 20; k += 5) {
14961 GemmMicrokernelTester()
14962 .mr(1)
14963 .nr(8)
14964 .kr(1)
14965 .sr(1)
14966 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014967 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014968 .k(k)
14969 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14970 }
14971 }
14972 }
14973
14974 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8_strided_cn) {
14975 TEST_REQUIRES_X86_SSE;
14976 for (uint32_t n = 9; n < 16; n++) {
14977 for (size_t k = 1; k <= 20; k += 5) {
14978 GemmMicrokernelTester()
14979 .mr(1)
14980 .nr(8)
14981 .kr(1)
14982 .sr(1)
14983 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014984 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014985 .k(k)
14986 .cn_stride(11)
14987 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
14988 }
14989 }
14990 }
14991
14992 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8_subtile) {
14993 TEST_REQUIRES_X86_SSE;
14994 for (uint32_t n = 9; n < 16; n++) {
14995 for (size_t k = 1; k <= 20; k += 5) {
14996 for (uint32_t m = 1; m <= 1; m++) {
14997 GemmMicrokernelTester()
14998 .mr(1)
14999 .nr(8)
15000 .kr(1)
15001 .sr(1)
15002 .m(m)
15003 .n(n)
15004 .k(k)
15005 .iterations(1)
15006 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15007 }
15008 }
15009 }
15010 }
15011
15012 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8) {
15013 TEST_REQUIRES_X86_SSE;
15014 for (uint32_t n = 16; n <= 24; n += 8) {
15015 for (size_t k = 1; k <= 20; k += 5) {
15016 GemmMicrokernelTester()
15017 .mr(1)
15018 .nr(8)
15019 .kr(1)
15020 .sr(1)
15021 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015022 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015023 .k(k)
15024 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15025 }
15026 }
15027 }
15028
15029 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8_strided_cn) {
15030 TEST_REQUIRES_X86_SSE;
15031 for (uint32_t n = 16; n <= 24; n += 8) {
15032 for (size_t k = 1; k <= 20; k += 5) {
15033 GemmMicrokernelTester()
15034 .mr(1)
15035 .nr(8)
15036 .kr(1)
15037 .sr(1)
15038 .m(1)
15039 .n(n)
15040 .k(k)
15041 .cn_stride(11)
15042 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15043 }
15044 }
15045 }
15046
15047 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8_subtile) {
15048 TEST_REQUIRES_X86_SSE;
15049 for (uint32_t n = 16; n <= 24; n += 8) {
15050 for (size_t k = 1; k <= 20; k += 5) {
15051 for (uint32_t m = 1; m <= 1; m++) {
15052 GemmMicrokernelTester()
15053 .mr(1)
15054 .nr(8)
15055 .kr(1)
15056 .sr(1)
15057 .m(m)
15058 .n(n)
15059 .k(k)
15060 .iterations(1)
15061 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15062 }
15063 }
15064 }
15065 }
15066
15067 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, small_kernel) {
15068 TEST_REQUIRES_X86_SSE;
15069 for (size_t k = 1; k <= 20; k += 5) {
15070 GemmMicrokernelTester()
15071 .mr(1)
15072 .nr(8)
15073 .kr(1)
15074 .sr(1)
15075 .m(1)
15076 .n(8)
15077 .k(k)
15078 .ks(3)
15079 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15080 }
15081 }
15082
15083 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, small_kernel_subtile) {
15084 TEST_REQUIRES_X86_SSE;
15085 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015086 for (uint32_t n = 1; n <= 8; n++) {
15087 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015088 GemmMicrokernelTester()
15089 .mr(1)
15090 .nr(8)
15091 .kr(1)
15092 .sr(1)
15093 .m(m)
15094 .n(n)
15095 .k(k)
15096 .ks(3)
15097 .iterations(1)
15098 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15099 }
15100 }
15101 }
15102 }
15103
15104 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_gt_8_small_kernel) {
15105 TEST_REQUIRES_X86_SSE;
15106 for (uint32_t n = 9; n < 16; n++) {
15107 for (size_t k = 1; k <= 20; k += 5) {
15108 GemmMicrokernelTester()
15109 .mr(1)
15110 .nr(8)
15111 .kr(1)
15112 .sr(1)
15113 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015114 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015115 .k(k)
15116 .ks(3)
15117 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15118 }
15119 }
15120 }
15121
15122 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, n_div_8_small_kernel) {
15123 TEST_REQUIRES_X86_SSE;
15124 for (uint32_t n = 16; n <= 24; n += 8) {
15125 for (size_t k = 1; k <= 20; k += 5) {
15126 GemmMicrokernelTester()
15127 .mr(1)
15128 .nr(8)
15129 .kr(1)
15130 .sr(1)
15131 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015132 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015133 .k(k)
15134 .ks(3)
15135 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15136 }
15137 }
15138 }
15139
15140 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, strided_cm_subtile) {
15141 TEST_REQUIRES_X86_SSE;
15142 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015143 for (uint32_t n = 1; n <= 8; n++) {
15144 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015145 GemmMicrokernelTester()
15146 .mr(1)
15147 .nr(8)
15148 .kr(1)
15149 .sr(1)
15150 .m(m)
15151 .n(n)
15152 .k(k)
15153 .cm_stride(11)
15154 .iterations(1)
15155 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15156 }
15157 }
15158 }
15159 }
15160
15161 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, a_offset) {
15162 TEST_REQUIRES_X86_SSE;
15163 for (size_t k = 1; k <= 20; k += 5) {
15164 GemmMicrokernelTester()
15165 .mr(1)
15166 .nr(8)
15167 .kr(1)
15168 .sr(1)
15169 .m(1)
15170 .n(8)
15171 .k(k)
15172 .ks(3)
15173 .a_offset(23)
15174 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15175 }
15176 }
15177
15178 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, zero) {
15179 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015180 for (size_t k = 1; k <= 20; k += 5) {
15181 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015182 GemmMicrokernelTester()
15183 .mr(1)
15184 .nr(8)
15185 .kr(1)
15186 .sr(1)
15187 .m(1)
15188 .n(8)
15189 .k(k)
15190 .ks(3)
15191 .a_offset(23)
15192 .zero_index(mz)
15193 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15194 }
15195 }
15196 }
15197
15198 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, qmin) {
15199 TEST_REQUIRES_X86_SSE;
15200 GemmMicrokernelTester()
15201 .mr(1)
15202 .nr(8)
15203 .kr(1)
15204 .sr(1)
15205 .m(1)
15206 .n(8)
15207 .k(4)
15208 .qmin(128)
15209 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15210 }
15211
15212 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, qmax) {
15213 TEST_REQUIRES_X86_SSE;
15214 GemmMicrokernelTester()
15215 .mr(1)
15216 .nr(8)
15217 .kr(1)
15218 .sr(1)
15219 .m(1)
15220 .n(8)
15221 .k(4)
15222 .qmax(128)
15223 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15224 }
15225
15226 TEST(F32_IGEMM_MINMAX_1X8__SSE_DUP, strided_cm) {
15227 TEST_REQUIRES_X86_SSE;
15228 GemmMicrokernelTester()
15229 .mr(1)
15230 .nr(8)
15231 .kr(1)
15232 .sr(1)
15233 .m(1)
15234 .n(8)
15235 .k(4)
15236 .cm_stride(11)
15237 .Test(xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, xnn_init_f32_minmax_sse_params);
15238 }
15239#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15240
15241
15242#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15243 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4) {
15244 TEST_REQUIRES_X86_SSE;
15245 GemmMicrokernelTester()
15246 .mr(4)
15247 .nr(8)
15248 .kr(1)
15249 .sr(1)
15250 .m(4)
15251 .n(8)
15252 .k(4)
15253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15254 }
15255
15256 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, strided_cn) {
15257 TEST_REQUIRES_X86_SSE;
15258 GemmMicrokernelTester()
15259 .mr(4)
15260 .nr(8)
15261 .kr(1)
15262 .sr(1)
15263 .m(4)
15264 .n(8)
15265 .k(4)
15266 .cn_stride(11)
15267 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15268 }
15269
15270 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4_subtile) {
15271 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015272 for (uint32_t n = 1; n <= 8; n++) {
15273 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015274 GemmMicrokernelTester()
15275 .mr(4)
15276 .nr(8)
15277 .kr(1)
15278 .sr(1)
15279 .m(m)
15280 .n(n)
15281 .k(4)
15282 .iterations(1)
15283 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15284 }
15285 }
15286 }
15287
15288 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_m) {
15289 TEST_REQUIRES_X86_SSE;
15290 for (uint32_t m = 1; m <= 4; m++) {
15291 GemmMicrokernelTester()
15292 .mr(4)
15293 .nr(8)
15294 .kr(1)
15295 .sr(1)
15296 .m(m)
15297 .n(8)
15298 .k(4)
15299 .iterations(1)
15300 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15301 }
15302 }
15303
15304 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_eq_4_subtile_n) {
15305 TEST_REQUIRES_X86_SSE;
15306 for (uint32_t n = 1; n <= 8; n++) {
15307 GemmMicrokernelTester()
15308 .mr(4)
15309 .nr(8)
15310 .kr(1)
15311 .sr(1)
15312 .m(4)
15313 .n(n)
15314 .k(4)
15315 .iterations(1)
15316 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15317 }
15318 }
15319
15320 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_lt_4) {
15321 TEST_REQUIRES_X86_SSE;
15322 for (size_t k = 1; k < 4; k++) {
15323 GemmMicrokernelTester()
15324 .mr(4)
15325 .nr(8)
15326 .kr(1)
15327 .sr(1)
15328 .m(4)
15329 .n(8)
15330 .k(k)
15331 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15332 }
15333 }
15334
15335 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_lt_4_subtile) {
15336 TEST_REQUIRES_X86_SSE;
15337 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015338 for (uint32_t n = 1; n <= 8; n++) {
15339 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015340 GemmMicrokernelTester()
15341 .mr(4)
15342 .nr(8)
15343 .kr(1)
15344 .sr(1)
15345 .m(m)
15346 .n(n)
15347 .k(k)
15348 .iterations(1)
15349 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15350 }
15351 }
15352 }
15353 }
15354
15355 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_gt_4) {
15356 TEST_REQUIRES_X86_SSE;
15357 for (size_t k = 5; k < 8; k++) {
15358 GemmMicrokernelTester()
15359 .mr(4)
15360 .nr(8)
15361 .kr(1)
15362 .sr(1)
15363 .m(4)
15364 .n(8)
15365 .k(k)
15366 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15367 }
15368 }
15369
15370 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_gt_4_subtile) {
15371 TEST_REQUIRES_X86_SSE;
15372 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015373 for (uint32_t n = 1; n <= 8; n++) {
15374 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015375 GemmMicrokernelTester()
15376 .mr(4)
15377 .nr(8)
15378 .kr(1)
15379 .sr(1)
15380 .m(m)
15381 .n(n)
15382 .k(k)
15383 .iterations(1)
15384 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15385 }
15386 }
15387 }
15388 }
15389
15390 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_div_4) {
15391 TEST_REQUIRES_X86_SSE;
15392 for (size_t k = 8; k <= 40; k += 4) {
15393 GemmMicrokernelTester()
15394 .mr(4)
15395 .nr(8)
15396 .kr(1)
15397 .sr(1)
15398 .m(4)
15399 .n(8)
15400 .k(k)
15401 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15402 }
15403 }
15404
15405 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, k_div_4_subtile) {
15406 TEST_REQUIRES_X86_SSE;
15407 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015408 for (uint32_t n = 1; n <= 8; n++) {
15409 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015410 GemmMicrokernelTester()
15411 .mr(4)
15412 .nr(8)
15413 .kr(1)
15414 .sr(1)
15415 .m(m)
15416 .n(n)
15417 .k(k)
15418 .iterations(1)
15419 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15420 }
15421 }
15422 }
15423 }
15424
15425 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8) {
15426 TEST_REQUIRES_X86_SSE;
15427 for (uint32_t n = 9; n < 16; n++) {
15428 for (size_t k = 1; k <= 20; k += 5) {
15429 GemmMicrokernelTester()
15430 .mr(4)
15431 .nr(8)
15432 .kr(1)
15433 .sr(1)
15434 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015435 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015436 .k(k)
15437 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15438 }
15439 }
15440 }
15441
15442 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8_strided_cn) {
15443 TEST_REQUIRES_X86_SSE;
15444 for (uint32_t n = 9; n < 16; n++) {
15445 for (size_t k = 1; k <= 20; k += 5) {
15446 GemmMicrokernelTester()
15447 .mr(4)
15448 .nr(8)
15449 .kr(1)
15450 .sr(1)
15451 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015452 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015453 .k(k)
15454 .cn_stride(11)
15455 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15456 }
15457 }
15458 }
15459
15460 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8_subtile) {
15461 TEST_REQUIRES_X86_SSE;
15462 for (uint32_t n = 9; n < 16; n++) {
15463 for (size_t k = 1; k <= 20; k += 5) {
15464 for (uint32_t m = 1; m <= 4; m++) {
15465 GemmMicrokernelTester()
15466 .mr(4)
15467 .nr(8)
15468 .kr(1)
15469 .sr(1)
15470 .m(m)
15471 .n(n)
15472 .k(k)
15473 .iterations(1)
15474 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15475 }
15476 }
15477 }
15478 }
15479
15480 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8) {
15481 TEST_REQUIRES_X86_SSE;
15482 for (uint32_t n = 16; n <= 24; n += 8) {
15483 for (size_t k = 1; k <= 20; k += 5) {
15484 GemmMicrokernelTester()
15485 .mr(4)
15486 .nr(8)
15487 .kr(1)
15488 .sr(1)
15489 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015490 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015491 .k(k)
15492 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15493 }
15494 }
15495 }
15496
15497 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8_strided_cn) {
15498 TEST_REQUIRES_X86_SSE;
15499 for (uint32_t n = 16; n <= 24; n += 8) {
15500 for (size_t k = 1; k <= 20; k += 5) {
15501 GemmMicrokernelTester()
15502 .mr(4)
15503 .nr(8)
15504 .kr(1)
15505 .sr(1)
15506 .m(4)
15507 .n(n)
15508 .k(k)
15509 .cn_stride(11)
15510 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15511 }
15512 }
15513 }
15514
15515 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8_subtile) {
15516 TEST_REQUIRES_X86_SSE;
15517 for (uint32_t n = 16; n <= 24; n += 8) {
15518 for (size_t k = 1; k <= 20; k += 5) {
15519 for (uint32_t m = 1; m <= 4; m++) {
15520 GemmMicrokernelTester()
15521 .mr(4)
15522 .nr(8)
15523 .kr(1)
15524 .sr(1)
15525 .m(m)
15526 .n(n)
15527 .k(k)
15528 .iterations(1)
15529 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15530 }
15531 }
15532 }
15533 }
15534
15535 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, small_kernel) {
15536 TEST_REQUIRES_X86_SSE;
15537 for (size_t k = 1; k <= 20; k += 5) {
15538 GemmMicrokernelTester()
15539 .mr(4)
15540 .nr(8)
15541 .kr(1)
15542 .sr(1)
15543 .m(4)
15544 .n(8)
15545 .k(k)
15546 .ks(3)
15547 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15548 }
15549 }
15550
15551 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, small_kernel_subtile) {
15552 TEST_REQUIRES_X86_SSE;
15553 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015554 for (uint32_t n = 1; n <= 8; n++) {
15555 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015556 GemmMicrokernelTester()
15557 .mr(4)
15558 .nr(8)
15559 .kr(1)
15560 .sr(1)
15561 .m(m)
15562 .n(n)
15563 .k(k)
15564 .ks(3)
15565 .iterations(1)
15566 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15567 }
15568 }
15569 }
15570 }
15571
15572 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_gt_8_small_kernel) {
15573 TEST_REQUIRES_X86_SSE;
15574 for (uint32_t n = 9; n < 16; n++) {
15575 for (size_t k = 1; k <= 20; k += 5) {
15576 GemmMicrokernelTester()
15577 .mr(4)
15578 .nr(8)
15579 .kr(1)
15580 .sr(1)
15581 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015582 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015583 .k(k)
15584 .ks(3)
15585 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15586 }
15587 }
15588 }
15589
15590 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, n_div_8_small_kernel) {
15591 TEST_REQUIRES_X86_SSE;
15592 for (uint32_t n = 16; n <= 24; n += 8) {
15593 for (size_t k = 1; k <= 20; k += 5) {
15594 GemmMicrokernelTester()
15595 .mr(4)
15596 .nr(8)
15597 .kr(1)
15598 .sr(1)
15599 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015600 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015601 .k(k)
15602 .ks(3)
15603 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15604 }
15605 }
15606 }
15607
15608 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, strided_cm_subtile) {
15609 TEST_REQUIRES_X86_SSE;
15610 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015611 for (uint32_t n = 1; n <= 8; n++) {
15612 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015613 GemmMicrokernelTester()
15614 .mr(4)
15615 .nr(8)
15616 .kr(1)
15617 .sr(1)
15618 .m(m)
15619 .n(n)
15620 .k(k)
15621 .cm_stride(11)
15622 .iterations(1)
15623 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15624 }
15625 }
15626 }
15627 }
15628
15629 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, a_offset) {
15630 TEST_REQUIRES_X86_SSE;
15631 for (size_t k = 1; k <= 20; k += 5) {
15632 GemmMicrokernelTester()
15633 .mr(4)
15634 .nr(8)
15635 .kr(1)
15636 .sr(1)
15637 .m(4)
15638 .n(8)
15639 .k(k)
15640 .ks(3)
15641 .a_offset(83)
15642 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15643 }
15644 }
15645
15646 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, zero) {
15647 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015648 for (size_t k = 1; k <= 20; k += 5) {
15649 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015650 GemmMicrokernelTester()
15651 .mr(4)
15652 .nr(8)
15653 .kr(1)
15654 .sr(1)
15655 .m(4)
15656 .n(8)
15657 .k(k)
15658 .ks(3)
15659 .a_offset(83)
15660 .zero_index(mz)
15661 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15662 }
15663 }
15664 }
15665
15666 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, qmin) {
15667 TEST_REQUIRES_X86_SSE;
15668 GemmMicrokernelTester()
15669 .mr(4)
15670 .nr(8)
15671 .kr(1)
15672 .sr(1)
15673 .m(4)
15674 .n(8)
15675 .k(4)
15676 .qmin(128)
15677 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15678 }
15679
15680 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, qmax) {
15681 TEST_REQUIRES_X86_SSE;
15682 GemmMicrokernelTester()
15683 .mr(4)
15684 .nr(8)
15685 .kr(1)
15686 .sr(1)
15687 .m(4)
15688 .n(8)
15689 .k(4)
15690 .qmax(128)
15691 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15692 }
15693
15694 TEST(F32_IGEMM_MINMAX_4X8__SSE_DUP, strided_cm) {
15695 TEST_REQUIRES_X86_SSE;
15696 GemmMicrokernelTester()
15697 .mr(4)
15698 .nr(8)
15699 .kr(1)
15700 .sr(1)
15701 .m(4)
15702 .n(8)
15703 .k(4)
15704 .cm_stride(11)
15705 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, xnn_init_f32_minmax_sse_params);
15706 }
15707#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15708
15709
15710#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15711 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4) {
15712 TEST_REQUIRES_X86_SSE;
15713 GemmMicrokernelTester()
15714 .mr(1)
15715 .nr(8)
15716 .kr(1)
15717 .sr(4)
15718 .m(1)
15719 .n(8)
15720 .k(4)
15721 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15722 }
15723
15724 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, strided_cn) {
15725 TEST_REQUIRES_X86_SSE;
15726 GemmMicrokernelTester()
15727 .mr(1)
15728 .nr(8)
15729 .kr(1)
15730 .sr(4)
15731 .m(1)
15732 .n(8)
15733 .k(4)
15734 .cn_stride(11)
15735 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15736 }
15737
15738 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4_subtile) {
15739 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015740 for (uint32_t n = 1; n <= 8; n++) {
15741 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015742 GemmMicrokernelTester()
15743 .mr(1)
15744 .nr(8)
15745 .kr(1)
15746 .sr(4)
15747 .m(m)
15748 .n(n)
15749 .k(4)
15750 .iterations(1)
15751 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15752 }
15753 }
15754 }
15755
15756 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4_subtile_m) {
15757 TEST_REQUIRES_X86_SSE;
15758 for (uint32_t m = 1; m <= 1; m++) {
15759 GemmMicrokernelTester()
15760 .mr(1)
15761 .nr(8)
15762 .kr(1)
15763 .sr(4)
15764 .m(m)
15765 .n(8)
15766 .k(4)
15767 .iterations(1)
15768 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15769 }
15770 }
15771
15772 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_eq_4_subtile_n) {
15773 TEST_REQUIRES_X86_SSE;
15774 for (uint32_t n = 1; n <= 8; n++) {
15775 GemmMicrokernelTester()
15776 .mr(1)
15777 .nr(8)
15778 .kr(1)
15779 .sr(4)
15780 .m(1)
15781 .n(n)
15782 .k(4)
15783 .iterations(1)
15784 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15785 }
15786 }
15787
15788 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_lt_4) {
15789 TEST_REQUIRES_X86_SSE;
15790 for (size_t k = 1; k < 4; k++) {
15791 GemmMicrokernelTester()
15792 .mr(1)
15793 .nr(8)
15794 .kr(1)
15795 .sr(4)
15796 .m(1)
15797 .n(8)
15798 .k(k)
15799 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15800 }
15801 }
15802
15803 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_lt_4_subtile) {
15804 TEST_REQUIRES_X86_SSE;
15805 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015806 for (uint32_t n = 1; n <= 8; n++) {
15807 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015808 GemmMicrokernelTester()
15809 .mr(1)
15810 .nr(8)
15811 .kr(1)
15812 .sr(4)
15813 .m(m)
15814 .n(n)
15815 .k(k)
15816 .iterations(1)
15817 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15818 }
15819 }
15820 }
15821 }
15822
15823 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_gt_4) {
15824 TEST_REQUIRES_X86_SSE;
15825 for (size_t k = 5; k < 8; k++) {
15826 GemmMicrokernelTester()
15827 .mr(1)
15828 .nr(8)
15829 .kr(1)
15830 .sr(4)
15831 .m(1)
15832 .n(8)
15833 .k(k)
15834 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15835 }
15836 }
15837
15838 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_gt_4_subtile) {
15839 TEST_REQUIRES_X86_SSE;
15840 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015841 for (uint32_t n = 1; n <= 8; n++) {
15842 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015843 GemmMicrokernelTester()
15844 .mr(1)
15845 .nr(8)
15846 .kr(1)
15847 .sr(4)
15848 .m(m)
15849 .n(n)
15850 .k(k)
15851 .iterations(1)
15852 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15853 }
15854 }
15855 }
15856 }
15857
15858 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_div_4) {
15859 TEST_REQUIRES_X86_SSE;
15860 for (size_t k = 8; k <= 40; k += 4) {
15861 GemmMicrokernelTester()
15862 .mr(1)
15863 .nr(8)
15864 .kr(1)
15865 .sr(4)
15866 .m(1)
15867 .n(8)
15868 .k(k)
15869 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15870 }
15871 }
15872
15873 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, k_div_4_subtile) {
15874 TEST_REQUIRES_X86_SSE;
15875 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015876 for (uint32_t n = 1; n <= 8; n++) {
15877 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015878 GemmMicrokernelTester()
15879 .mr(1)
15880 .nr(8)
15881 .kr(1)
15882 .sr(4)
15883 .m(m)
15884 .n(n)
15885 .k(k)
15886 .iterations(1)
15887 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15888 }
15889 }
15890 }
15891 }
15892
15893 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8) {
15894 TEST_REQUIRES_X86_SSE;
15895 for (uint32_t n = 9; n < 16; n++) {
15896 for (size_t k = 1; k <= 20; k += 5) {
15897 GemmMicrokernelTester()
15898 .mr(1)
15899 .nr(8)
15900 .kr(1)
15901 .sr(4)
15902 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015903 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015904 .k(k)
15905 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15906 }
15907 }
15908 }
15909
15910 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8_strided_cn) {
15911 TEST_REQUIRES_X86_SSE;
15912 for (uint32_t n = 9; n < 16; n++) {
15913 for (size_t k = 1; k <= 20; k += 5) {
15914 GemmMicrokernelTester()
15915 .mr(1)
15916 .nr(8)
15917 .kr(1)
15918 .sr(4)
15919 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015920 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015921 .k(k)
15922 .cn_stride(11)
15923 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15924 }
15925 }
15926 }
15927
15928 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8_subtile) {
15929 TEST_REQUIRES_X86_SSE;
15930 for (uint32_t n = 9; n < 16; n++) {
15931 for (size_t k = 1; k <= 20; k += 5) {
15932 for (uint32_t m = 1; m <= 1; m++) {
15933 GemmMicrokernelTester()
15934 .mr(1)
15935 .nr(8)
15936 .kr(1)
15937 .sr(4)
15938 .m(m)
15939 .n(n)
15940 .k(k)
15941 .iterations(1)
15942 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15943 }
15944 }
15945 }
15946 }
15947
15948 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8) {
15949 TEST_REQUIRES_X86_SSE;
15950 for (uint32_t n = 16; n <= 24; n += 8) {
15951 for (size_t k = 1; k <= 20; k += 5) {
15952 GemmMicrokernelTester()
15953 .mr(1)
15954 .nr(8)
15955 .kr(1)
15956 .sr(4)
15957 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015958 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015959 .k(k)
15960 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15961 }
15962 }
15963 }
15964
15965 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8_strided_cn) {
15966 TEST_REQUIRES_X86_SSE;
15967 for (uint32_t n = 16; n <= 24; n += 8) {
15968 for (size_t k = 1; k <= 20; k += 5) {
15969 GemmMicrokernelTester()
15970 .mr(1)
15971 .nr(8)
15972 .kr(1)
15973 .sr(4)
15974 .m(1)
15975 .n(n)
15976 .k(k)
15977 .cn_stride(11)
15978 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15979 }
15980 }
15981 }
15982
15983 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8_subtile) {
15984 TEST_REQUIRES_X86_SSE;
15985 for (uint32_t n = 16; n <= 24; n += 8) {
15986 for (size_t k = 1; k <= 20; k += 5) {
15987 for (uint32_t m = 1; m <= 1; m++) {
15988 GemmMicrokernelTester()
15989 .mr(1)
15990 .nr(8)
15991 .kr(1)
15992 .sr(4)
15993 .m(m)
15994 .n(n)
15995 .k(k)
15996 .iterations(1)
15997 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
15998 }
15999 }
16000 }
16001 }
16002
16003 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, small_kernel) {
16004 TEST_REQUIRES_X86_SSE;
16005 for (size_t k = 1; k <= 20; k += 5) {
16006 GemmMicrokernelTester()
16007 .mr(1)
16008 .nr(8)
16009 .kr(1)
16010 .sr(4)
16011 .m(1)
16012 .n(8)
16013 .k(k)
16014 .ks(3)
16015 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16016 }
16017 }
16018
16019 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, small_kernel_subtile) {
16020 TEST_REQUIRES_X86_SSE;
16021 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016022 for (uint32_t n = 1; n <= 8; n++) {
16023 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016024 GemmMicrokernelTester()
16025 .mr(1)
16026 .nr(8)
16027 .kr(1)
16028 .sr(4)
16029 .m(m)
16030 .n(n)
16031 .k(k)
16032 .ks(3)
16033 .iterations(1)
16034 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16035 }
16036 }
16037 }
16038 }
16039
16040 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_gt_8_small_kernel) {
16041 TEST_REQUIRES_X86_SSE;
16042 for (uint32_t n = 9; n < 16; n++) {
16043 for (size_t k = 1; k <= 20; k += 5) {
16044 GemmMicrokernelTester()
16045 .mr(1)
16046 .nr(8)
16047 .kr(1)
16048 .sr(4)
16049 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016050 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016051 .k(k)
16052 .ks(3)
16053 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16054 }
16055 }
16056 }
16057
16058 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, n_div_8_small_kernel) {
16059 TEST_REQUIRES_X86_SSE;
16060 for (uint32_t n = 16; n <= 24; n += 8) {
16061 for (size_t k = 1; k <= 20; k += 5) {
16062 GemmMicrokernelTester()
16063 .mr(1)
16064 .nr(8)
16065 .kr(1)
16066 .sr(4)
16067 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016068 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016069 .k(k)
16070 .ks(3)
16071 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16072 }
16073 }
16074 }
16075
16076 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, strided_cm_subtile) {
16077 TEST_REQUIRES_X86_SSE;
16078 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016079 for (uint32_t n = 1; n <= 8; n++) {
16080 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016081 GemmMicrokernelTester()
16082 .mr(1)
16083 .nr(8)
16084 .kr(1)
16085 .sr(4)
16086 .m(m)
16087 .n(n)
16088 .k(k)
16089 .cm_stride(11)
16090 .iterations(1)
16091 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16092 }
16093 }
16094 }
16095 }
16096
16097 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, a_offset) {
16098 TEST_REQUIRES_X86_SSE;
16099 for (size_t k = 1; k <= 20; k += 5) {
16100 GemmMicrokernelTester()
16101 .mr(1)
16102 .nr(8)
16103 .kr(1)
16104 .sr(4)
16105 .m(1)
16106 .n(8)
16107 .k(k)
16108 .ks(3)
16109 .a_offset(23)
16110 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16111 }
16112 }
16113
16114 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, zero) {
16115 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016116 for (size_t k = 1; k <= 20; k += 5) {
16117 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016118 GemmMicrokernelTester()
16119 .mr(1)
16120 .nr(8)
16121 .kr(1)
16122 .sr(4)
16123 .m(1)
16124 .n(8)
16125 .k(k)
16126 .ks(3)
16127 .a_offset(23)
16128 .zero_index(mz)
16129 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16130 }
16131 }
16132 }
16133
16134 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, qmin) {
16135 TEST_REQUIRES_X86_SSE;
16136 GemmMicrokernelTester()
16137 .mr(1)
16138 .nr(8)
16139 .kr(1)
16140 .sr(4)
16141 .m(1)
16142 .n(8)
16143 .k(4)
16144 .qmin(128)
16145 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16146 }
16147
16148 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, qmax) {
16149 TEST_REQUIRES_X86_SSE;
16150 GemmMicrokernelTester()
16151 .mr(1)
16152 .nr(8)
16153 .kr(1)
16154 .sr(4)
16155 .m(1)
16156 .n(8)
16157 .k(4)
16158 .qmax(128)
16159 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16160 }
16161
16162 TEST(F32_IGEMM_MINMAX_1X8S4__SSE, strided_cm) {
16163 TEST_REQUIRES_X86_SSE;
16164 GemmMicrokernelTester()
16165 .mr(1)
16166 .nr(8)
16167 .kr(1)
16168 .sr(4)
16169 .m(1)
16170 .n(8)
16171 .k(4)
16172 .cm_stride(11)
16173 .Test(xnn_f32_igemm_minmax_ukernel_1x8s4__sse, xnn_init_f32_minmax_sse_params);
16174 }
16175#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16176
16177
16178#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16179 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4) {
16180 TEST_REQUIRES_X86_SSE;
16181 GemmMicrokernelTester()
16182 .mr(4)
16183 .nr(8)
16184 .kr(1)
16185 .sr(4)
16186 .m(4)
16187 .n(8)
16188 .k(4)
16189 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16190 }
16191
16192 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, strided_cn) {
16193 TEST_REQUIRES_X86_SSE;
16194 GemmMicrokernelTester()
16195 .mr(4)
16196 .nr(8)
16197 .kr(1)
16198 .sr(4)
16199 .m(4)
16200 .n(8)
16201 .k(4)
16202 .cn_stride(11)
16203 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16204 }
16205
16206 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4_subtile) {
16207 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016208 for (uint32_t n = 1; n <= 8; n++) {
16209 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016210 GemmMicrokernelTester()
16211 .mr(4)
16212 .nr(8)
16213 .kr(1)
16214 .sr(4)
16215 .m(m)
16216 .n(n)
16217 .k(4)
16218 .iterations(1)
16219 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16220 }
16221 }
16222 }
16223
16224 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4_subtile_m) {
16225 TEST_REQUIRES_X86_SSE;
16226 for (uint32_t m = 1; m <= 4; m++) {
16227 GemmMicrokernelTester()
16228 .mr(4)
16229 .nr(8)
16230 .kr(1)
16231 .sr(4)
16232 .m(m)
16233 .n(8)
16234 .k(4)
16235 .iterations(1)
16236 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16237 }
16238 }
16239
16240 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_eq_4_subtile_n) {
16241 TEST_REQUIRES_X86_SSE;
16242 for (uint32_t n = 1; n <= 8; n++) {
16243 GemmMicrokernelTester()
16244 .mr(4)
16245 .nr(8)
16246 .kr(1)
16247 .sr(4)
16248 .m(4)
16249 .n(n)
16250 .k(4)
16251 .iterations(1)
16252 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16253 }
16254 }
16255
16256 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_lt_4) {
16257 TEST_REQUIRES_X86_SSE;
16258 for (size_t k = 1; k < 4; k++) {
16259 GemmMicrokernelTester()
16260 .mr(4)
16261 .nr(8)
16262 .kr(1)
16263 .sr(4)
16264 .m(4)
16265 .n(8)
16266 .k(k)
16267 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16268 }
16269 }
16270
16271 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_lt_4_subtile) {
16272 TEST_REQUIRES_X86_SSE;
16273 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016274 for (uint32_t n = 1; n <= 8; n++) {
16275 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016276 GemmMicrokernelTester()
16277 .mr(4)
16278 .nr(8)
16279 .kr(1)
16280 .sr(4)
16281 .m(m)
16282 .n(n)
16283 .k(k)
16284 .iterations(1)
16285 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16286 }
16287 }
16288 }
16289 }
16290
16291 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_gt_4) {
16292 TEST_REQUIRES_X86_SSE;
16293 for (size_t k = 5; k < 8; k++) {
16294 GemmMicrokernelTester()
16295 .mr(4)
16296 .nr(8)
16297 .kr(1)
16298 .sr(4)
16299 .m(4)
16300 .n(8)
16301 .k(k)
16302 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16303 }
16304 }
16305
16306 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_gt_4_subtile) {
16307 TEST_REQUIRES_X86_SSE;
16308 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016309 for (uint32_t n = 1; n <= 8; n++) {
16310 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016311 GemmMicrokernelTester()
16312 .mr(4)
16313 .nr(8)
16314 .kr(1)
16315 .sr(4)
16316 .m(m)
16317 .n(n)
16318 .k(k)
16319 .iterations(1)
16320 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16321 }
16322 }
16323 }
16324 }
16325
16326 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_div_4) {
16327 TEST_REQUIRES_X86_SSE;
16328 for (size_t k = 8; k <= 40; k += 4) {
16329 GemmMicrokernelTester()
16330 .mr(4)
16331 .nr(8)
16332 .kr(1)
16333 .sr(4)
16334 .m(4)
16335 .n(8)
16336 .k(k)
16337 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16338 }
16339 }
16340
16341 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, k_div_4_subtile) {
16342 TEST_REQUIRES_X86_SSE;
16343 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016344 for (uint32_t n = 1; n <= 8; n++) {
16345 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016346 GemmMicrokernelTester()
16347 .mr(4)
16348 .nr(8)
16349 .kr(1)
16350 .sr(4)
16351 .m(m)
16352 .n(n)
16353 .k(k)
16354 .iterations(1)
16355 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16356 }
16357 }
16358 }
16359 }
16360
16361 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8) {
16362 TEST_REQUIRES_X86_SSE;
16363 for (uint32_t n = 9; n < 16; n++) {
16364 for (size_t k = 1; k <= 20; k += 5) {
16365 GemmMicrokernelTester()
16366 .mr(4)
16367 .nr(8)
16368 .kr(1)
16369 .sr(4)
16370 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016371 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016372 .k(k)
16373 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16374 }
16375 }
16376 }
16377
16378 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8_strided_cn) {
16379 TEST_REQUIRES_X86_SSE;
16380 for (uint32_t n = 9; n < 16; n++) {
16381 for (size_t k = 1; k <= 20; k += 5) {
16382 GemmMicrokernelTester()
16383 .mr(4)
16384 .nr(8)
16385 .kr(1)
16386 .sr(4)
16387 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016388 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016389 .k(k)
16390 .cn_stride(11)
16391 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16392 }
16393 }
16394 }
16395
16396 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8_subtile) {
16397 TEST_REQUIRES_X86_SSE;
16398 for (uint32_t n = 9; n < 16; n++) {
16399 for (size_t k = 1; k <= 20; k += 5) {
16400 for (uint32_t m = 1; m <= 4; m++) {
16401 GemmMicrokernelTester()
16402 .mr(4)
16403 .nr(8)
16404 .kr(1)
16405 .sr(4)
16406 .m(m)
16407 .n(n)
16408 .k(k)
16409 .iterations(1)
16410 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16411 }
16412 }
16413 }
16414 }
16415
16416 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8) {
16417 TEST_REQUIRES_X86_SSE;
16418 for (uint32_t n = 16; n <= 24; n += 8) {
16419 for (size_t k = 1; k <= 20; k += 5) {
16420 GemmMicrokernelTester()
16421 .mr(4)
16422 .nr(8)
16423 .kr(1)
16424 .sr(4)
16425 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016426 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016427 .k(k)
16428 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16429 }
16430 }
16431 }
16432
16433 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8_strided_cn) {
16434 TEST_REQUIRES_X86_SSE;
16435 for (uint32_t n = 16; n <= 24; n += 8) {
16436 for (size_t k = 1; k <= 20; k += 5) {
16437 GemmMicrokernelTester()
16438 .mr(4)
16439 .nr(8)
16440 .kr(1)
16441 .sr(4)
16442 .m(4)
16443 .n(n)
16444 .k(k)
16445 .cn_stride(11)
16446 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16447 }
16448 }
16449 }
16450
16451 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8_subtile) {
16452 TEST_REQUIRES_X86_SSE;
16453 for (uint32_t n = 16; n <= 24; n += 8) {
16454 for (size_t k = 1; k <= 20; k += 5) {
16455 for (uint32_t m = 1; m <= 4; m++) {
16456 GemmMicrokernelTester()
16457 .mr(4)
16458 .nr(8)
16459 .kr(1)
16460 .sr(4)
16461 .m(m)
16462 .n(n)
16463 .k(k)
16464 .iterations(1)
16465 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16466 }
16467 }
16468 }
16469 }
16470
16471 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, small_kernel) {
16472 TEST_REQUIRES_X86_SSE;
16473 for (size_t k = 1; k <= 20; k += 5) {
16474 GemmMicrokernelTester()
16475 .mr(4)
16476 .nr(8)
16477 .kr(1)
16478 .sr(4)
16479 .m(4)
16480 .n(8)
16481 .k(k)
16482 .ks(3)
16483 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16484 }
16485 }
16486
16487 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, small_kernel_subtile) {
16488 TEST_REQUIRES_X86_SSE;
16489 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016490 for (uint32_t n = 1; n <= 8; n++) {
16491 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016492 GemmMicrokernelTester()
16493 .mr(4)
16494 .nr(8)
16495 .kr(1)
16496 .sr(4)
16497 .m(m)
16498 .n(n)
16499 .k(k)
16500 .ks(3)
16501 .iterations(1)
16502 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16503 }
16504 }
16505 }
16506 }
16507
16508 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_gt_8_small_kernel) {
16509 TEST_REQUIRES_X86_SSE;
16510 for (uint32_t n = 9; n < 16; n++) {
16511 for (size_t k = 1; k <= 20; k += 5) {
16512 GemmMicrokernelTester()
16513 .mr(4)
16514 .nr(8)
16515 .kr(1)
16516 .sr(4)
16517 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016518 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016519 .k(k)
16520 .ks(3)
16521 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16522 }
16523 }
16524 }
16525
16526 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, n_div_8_small_kernel) {
16527 TEST_REQUIRES_X86_SSE;
16528 for (uint32_t n = 16; n <= 24; n += 8) {
16529 for (size_t k = 1; k <= 20; k += 5) {
16530 GemmMicrokernelTester()
16531 .mr(4)
16532 .nr(8)
16533 .kr(1)
16534 .sr(4)
16535 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016536 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016537 .k(k)
16538 .ks(3)
16539 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16540 }
16541 }
16542 }
16543
16544 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, strided_cm_subtile) {
16545 TEST_REQUIRES_X86_SSE;
16546 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016547 for (uint32_t n = 1; n <= 8; n++) {
16548 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016549 GemmMicrokernelTester()
16550 .mr(4)
16551 .nr(8)
16552 .kr(1)
16553 .sr(4)
16554 .m(m)
16555 .n(n)
16556 .k(k)
16557 .cm_stride(11)
16558 .iterations(1)
16559 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16560 }
16561 }
16562 }
16563 }
16564
16565 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, a_offset) {
16566 TEST_REQUIRES_X86_SSE;
16567 for (size_t k = 1; k <= 20; k += 5) {
16568 GemmMicrokernelTester()
16569 .mr(4)
16570 .nr(8)
16571 .kr(1)
16572 .sr(4)
16573 .m(4)
16574 .n(8)
16575 .k(k)
16576 .ks(3)
16577 .a_offset(83)
16578 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16579 }
16580 }
16581
16582 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, zero) {
16583 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016584 for (size_t k = 1; k <= 20; k += 5) {
16585 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016586 GemmMicrokernelTester()
16587 .mr(4)
16588 .nr(8)
16589 .kr(1)
16590 .sr(4)
16591 .m(4)
16592 .n(8)
16593 .k(k)
16594 .ks(3)
16595 .a_offset(83)
16596 .zero_index(mz)
16597 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16598 }
16599 }
16600 }
16601
16602 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, qmin) {
16603 TEST_REQUIRES_X86_SSE;
16604 GemmMicrokernelTester()
16605 .mr(4)
16606 .nr(8)
16607 .kr(1)
16608 .sr(4)
16609 .m(4)
16610 .n(8)
16611 .k(4)
16612 .qmin(128)
16613 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16614 }
16615
16616 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, qmax) {
16617 TEST_REQUIRES_X86_SSE;
16618 GemmMicrokernelTester()
16619 .mr(4)
16620 .nr(8)
16621 .kr(1)
16622 .sr(4)
16623 .m(4)
16624 .n(8)
16625 .k(4)
16626 .qmax(128)
16627 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16628 }
16629
16630 TEST(F32_IGEMM_MINMAX_4X8S4__SSE, strided_cm) {
16631 TEST_REQUIRES_X86_SSE;
16632 GemmMicrokernelTester()
16633 .mr(4)
16634 .nr(8)
16635 .kr(1)
16636 .sr(4)
16637 .m(4)
16638 .n(8)
16639 .k(4)
16640 .cm_stride(11)
16641 .Test(xnn_f32_igemm_minmax_ukernel_4x8s4__sse, xnn_init_f32_minmax_sse_params);
16642 }
16643#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16644
16645
16646#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16647 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4) {
16648 TEST_REQUIRES_X86_SSE;
16649 GemmMicrokernelTester()
16650 .mr(5)
16651 .nr(8)
16652 .kr(1)
16653 .sr(4)
16654 .m(5)
16655 .n(8)
16656 .k(4)
16657 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16658 }
16659
16660 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, strided_cn) {
16661 TEST_REQUIRES_X86_SSE;
16662 GemmMicrokernelTester()
16663 .mr(5)
16664 .nr(8)
16665 .kr(1)
16666 .sr(4)
16667 .m(5)
16668 .n(8)
16669 .k(4)
16670 .cn_stride(11)
16671 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16672 }
16673
16674 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4_subtile) {
16675 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016676 for (uint32_t n = 1; n <= 8; n++) {
16677 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016678 GemmMicrokernelTester()
16679 .mr(5)
16680 .nr(8)
16681 .kr(1)
16682 .sr(4)
16683 .m(m)
16684 .n(n)
16685 .k(4)
16686 .iterations(1)
16687 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16688 }
16689 }
16690 }
16691
16692 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4_subtile_m) {
16693 TEST_REQUIRES_X86_SSE;
16694 for (uint32_t m = 1; m <= 5; m++) {
16695 GemmMicrokernelTester()
16696 .mr(5)
16697 .nr(8)
16698 .kr(1)
16699 .sr(4)
16700 .m(m)
16701 .n(8)
16702 .k(4)
16703 .iterations(1)
16704 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16705 }
16706 }
16707
16708 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_eq_4_subtile_n) {
16709 TEST_REQUIRES_X86_SSE;
16710 for (uint32_t n = 1; n <= 8; n++) {
16711 GemmMicrokernelTester()
16712 .mr(5)
16713 .nr(8)
16714 .kr(1)
16715 .sr(4)
16716 .m(5)
16717 .n(n)
16718 .k(4)
16719 .iterations(1)
16720 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16721 }
16722 }
16723
16724 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_lt_4) {
16725 TEST_REQUIRES_X86_SSE;
16726 for (size_t k = 1; k < 4; k++) {
16727 GemmMicrokernelTester()
16728 .mr(5)
16729 .nr(8)
16730 .kr(1)
16731 .sr(4)
16732 .m(5)
16733 .n(8)
16734 .k(k)
16735 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16736 }
16737 }
16738
16739 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_lt_4_subtile) {
16740 TEST_REQUIRES_X86_SSE;
16741 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016742 for (uint32_t n = 1; n <= 8; n++) {
16743 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016744 GemmMicrokernelTester()
16745 .mr(5)
16746 .nr(8)
16747 .kr(1)
16748 .sr(4)
16749 .m(m)
16750 .n(n)
16751 .k(k)
16752 .iterations(1)
16753 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16754 }
16755 }
16756 }
16757 }
16758
16759 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_gt_4) {
16760 TEST_REQUIRES_X86_SSE;
16761 for (size_t k = 5; k < 8; k++) {
16762 GemmMicrokernelTester()
16763 .mr(5)
16764 .nr(8)
16765 .kr(1)
16766 .sr(4)
16767 .m(5)
16768 .n(8)
16769 .k(k)
16770 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16771 }
16772 }
16773
16774 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_gt_4_subtile) {
16775 TEST_REQUIRES_X86_SSE;
16776 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016777 for (uint32_t n = 1; n <= 8; n++) {
16778 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016779 GemmMicrokernelTester()
16780 .mr(5)
16781 .nr(8)
16782 .kr(1)
16783 .sr(4)
16784 .m(m)
16785 .n(n)
16786 .k(k)
16787 .iterations(1)
16788 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16789 }
16790 }
16791 }
16792 }
16793
16794 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_div_4) {
16795 TEST_REQUIRES_X86_SSE;
16796 for (size_t k = 8; k <= 40; k += 4) {
16797 GemmMicrokernelTester()
16798 .mr(5)
16799 .nr(8)
16800 .kr(1)
16801 .sr(4)
16802 .m(5)
16803 .n(8)
16804 .k(k)
16805 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16806 }
16807 }
16808
16809 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, k_div_4_subtile) {
16810 TEST_REQUIRES_X86_SSE;
16811 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016812 for (uint32_t n = 1; n <= 8; n++) {
16813 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016814 GemmMicrokernelTester()
16815 .mr(5)
16816 .nr(8)
16817 .kr(1)
16818 .sr(4)
16819 .m(m)
16820 .n(n)
16821 .k(k)
16822 .iterations(1)
16823 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16824 }
16825 }
16826 }
16827 }
16828
16829 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8) {
16830 TEST_REQUIRES_X86_SSE;
16831 for (uint32_t n = 9; n < 16; n++) {
16832 for (size_t k = 1; k <= 20; k += 5) {
16833 GemmMicrokernelTester()
16834 .mr(5)
16835 .nr(8)
16836 .kr(1)
16837 .sr(4)
16838 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016839 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016840 .k(k)
16841 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16842 }
16843 }
16844 }
16845
16846 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8_strided_cn) {
16847 TEST_REQUIRES_X86_SSE;
16848 for (uint32_t n = 9; n < 16; n++) {
16849 for (size_t k = 1; k <= 20; k += 5) {
16850 GemmMicrokernelTester()
16851 .mr(5)
16852 .nr(8)
16853 .kr(1)
16854 .sr(4)
16855 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016856 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016857 .k(k)
16858 .cn_stride(11)
16859 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16860 }
16861 }
16862 }
16863
16864 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8_subtile) {
16865 TEST_REQUIRES_X86_SSE;
16866 for (uint32_t n = 9; n < 16; n++) {
16867 for (size_t k = 1; k <= 20; k += 5) {
16868 for (uint32_t m = 1; m <= 5; m++) {
16869 GemmMicrokernelTester()
16870 .mr(5)
16871 .nr(8)
16872 .kr(1)
16873 .sr(4)
16874 .m(m)
16875 .n(n)
16876 .k(k)
16877 .iterations(1)
16878 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16879 }
16880 }
16881 }
16882 }
16883
16884 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8) {
16885 TEST_REQUIRES_X86_SSE;
16886 for (uint32_t n = 16; n <= 24; n += 8) {
16887 for (size_t k = 1; k <= 20; k += 5) {
16888 GemmMicrokernelTester()
16889 .mr(5)
16890 .nr(8)
16891 .kr(1)
16892 .sr(4)
16893 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016894 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016895 .k(k)
16896 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16897 }
16898 }
16899 }
16900
16901 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8_strided_cn) {
16902 TEST_REQUIRES_X86_SSE;
16903 for (uint32_t n = 16; n <= 24; n += 8) {
16904 for (size_t k = 1; k <= 20; k += 5) {
16905 GemmMicrokernelTester()
16906 .mr(5)
16907 .nr(8)
16908 .kr(1)
16909 .sr(4)
16910 .m(5)
16911 .n(n)
16912 .k(k)
16913 .cn_stride(11)
16914 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16915 }
16916 }
16917 }
16918
16919 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8_subtile) {
16920 TEST_REQUIRES_X86_SSE;
16921 for (uint32_t n = 16; n <= 24; n += 8) {
16922 for (size_t k = 1; k <= 20; k += 5) {
16923 for (uint32_t m = 1; m <= 5; m++) {
16924 GemmMicrokernelTester()
16925 .mr(5)
16926 .nr(8)
16927 .kr(1)
16928 .sr(4)
16929 .m(m)
16930 .n(n)
16931 .k(k)
16932 .iterations(1)
16933 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16934 }
16935 }
16936 }
16937 }
16938
16939 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, small_kernel) {
16940 TEST_REQUIRES_X86_SSE;
16941 for (size_t k = 1; k <= 20; k += 5) {
16942 GemmMicrokernelTester()
16943 .mr(5)
16944 .nr(8)
16945 .kr(1)
16946 .sr(4)
16947 .m(5)
16948 .n(8)
16949 .k(k)
16950 .ks(3)
16951 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16952 }
16953 }
16954
16955 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, small_kernel_subtile) {
16956 TEST_REQUIRES_X86_SSE;
16957 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016958 for (uint32_t n = 1; n <= 8; n++) {
16959 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016960 GemmMicrokernelTester()
16961 .mr(5)
16962 .nr(8)
16963 .kr(1)
16964 .sr(4)
16965 .m(m)
16966 .n(n)
16967 .k(k)
16968 .ks(3)
16969 .iterations(1)
16970 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16971 }
16972 }
16973 }
16974 }
16975
16976 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_gt_8_small_kernel) {
16977 TEST_REQUIRES_X86_SSE;
16978 for (uint32_t n = 9; n < 16; n++) {
16979 for (size_t k = 1; k <= 20; k += 5) {
16980 GemmMicrokernelTester()
16981 .mr(5)
16982 .nr(8)
16983 .kr(1)
16984 .sr(4)
16985 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016986 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016987 .k(k)
16988 .ks(3)
16989 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
16990 }
16991 }
16992 }
16993
16994 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, n_div_8_small_kernel) {
16995 TEST_REQUIRES_X86_SSE;
16996 for (uint32_t n = 16; n <= 24; n += 8) {
16997 for (size_t k = 1; k <= 20; k += 5) {
16998 GemmMicrokernelTester()
16999 .mr(5)
17000 .nr(8)
17001 .kr(1)
17002 .sr(4)
17003 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017004 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017005 .k(k)
17006 .ks(3)
17007 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17008 }
17009 }
17010 }
17011
17012 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, strided_cm_subtile) {
17013 TEST_REQUIRES_X86_SSE;
17014 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017015 for (uint32_t n = 1; n <= 8; n++) {
17016 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017017 GemmMicrokernelTester()
17018 .mr(5)
17019 .nr(8)
17020 .kr(1)
17021 .sr(4)
17022 .m(m)
17023 .n(n)
17024 .k(k)
17025 .cm_stride(11)
17026 .iterations(1)
17027 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17028 }
17029 }
17030 }
17031 }
17032
17033 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, a_offset) {
17034 TEST_REQUIRES_X86_SSE;
17035 for (size_t k = 1; k <= 20; k += 5) {
17036 GemmMicrokernelTester()
17037 .mr(5)
17038 .nr(8)
17039 .kr(1)
17040 .sr(4)
17041 .m(5)
17042 .n(8)
17043 .k(k)
17044 .ks(3)
17045 .a_offset(103)
17046 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17047 }
17048 }
17049
17050 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, zero) {
17051 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017052 for (size_t k = 1; k <= 20; k += 5) {
17053 for (uint32_t mz = 0; mz < 5; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017054 GemmMicrokernelTester()
17055 .mr(5)
17056 .nr(8)
17057 .kr(1)
17058 .sr(4)
17059 .m(5)
17060 .n(8)
17061 .k(k)
17062 .ks(3)
17063 .a_offset(103)
17064 .zero_index(mz)
17065 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17066 }
17067 }
17068 }
17069
17070 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, qmin) {
17071 TEST_REQUIRES_X86_SSE;
17072 GemmMicrokernelTester()
17073 .mr(5)
17074 .nr(8)
17075 .kr(1)
17076 .sr(4)
17077 .m(5)
17078 .n(8)
17079 .k(4)
17080 .qmin(128)
17081 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17082 }
17083
17084 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, qmax) {
17085 TEST_REQUIRES_X86_SSE;
17086 GemmMicrokernelTester()
17087 .mr(5)
17088 .nr(8)
17089 .kr(1)
17090 .sr(4)
17091 .m(5)
17092 .n(8)
17093 .k(4)
17094 .qmax(128)
17095 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17096 }
17097
17098 TEST(F32_IGEMM_MINMAX_5X8S4__SSE, strided_cm) {
17099 TEST_REQUIRES_X86_SSE;
17100 GemmMicrokernelTester()
17101 .mr(5)
17102 .nr(8)
17103 .kr(1)
17104 .sr(4)
17105 .m(5)
17106 .n(8)
17107 .k(4)
17108 .cm_stride(11)
17109 .Test(xnn_f32_igemm_minmax_ukernel_5x8s4__sse, xnn_init_f32_minmax_sse_params);
17110 }
17111#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17112
17113
17114#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17115 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4) {
17116 TEST_REQUIRES_X86_SSE;
17117 GemmMicrokernelTester()
17118 .mr(4)
17119 .nr(2)
17120 .kr(4)
17121 .sr(1)
17122 .m(4)
17123 .n(2)
17124 .k(4)
17125 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17126 }
17127
17128 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, strided_cn) {
17129 TEST_REQUIRES_X86_SSE;
17130 GemmMicrokernelTester()
17131 .mr(4)
17132 .nr(2)
17133 .kr(4)
17134 .sr(1)
17135 .m(4)
17136 .n(2)
17137 .k(4)
17138 .cn_stride(5)
17139 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17140 }
17141
17142 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4_subtile) {
17143 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017144 for (uint32_t n = 1; n <= 2; n++) {
17145 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017146 GemmMicrokernelTester()
17147 .mr(4)
17148 .nr(2)
17149 .kr(4)
17150 .sr(1)
17151 .m(m)
17152 .n(n)
17153 .k(4)
17154 .iterations(1)
17155 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17156 }
17157 }
17158 }
17159
17160 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4_subtile_m) {
17161 TEST_REQUIRES_X86_SSE;
17162 for (uint32_t m = 1; m <= 4; m++) {
17163 GemmMicrokernelTester()
17164 .mr(4)
17165 .nr(2)
17166 .kr(4)
17167 .sr(1)
17168 .m(m)
17169 .n(2)
17170 .k(4)
17171 .iterations(1)
17172 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17173 }
17174 }
17175
17176 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_eq_4_subtile_n) {
17177 TEST_REQUIRES_X86_SSE;
17178 for (uint32_t n = 1; n <= 2; n++) {
17179 GemmMicrokernelTester()
17180 .mr(4)
17181 .nr(2)
17182 .kr(4)
17183 .sr(1)
17184 .m(4)
17185 .n(n)
17186 .k(4)
17187 .iterations(1)
17188 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17189 }
17190 }
17191
17192 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_lt_4) {
17193 TEST_REQUIRES_X86_SSE;
17194 for (size_t k = 1; k < 4; k++) {
17195 GemmMicrokernelTester()
17196 .mr(4)
17197 .nr(2)
17198 .kr(4)
17199 .sr(1)
17200 .m(4)
17201 .n(2)
17202 .k(k)
17203 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17204 }
17205 }
17206
17207 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_lt_4_subtile) {
17208 TEST_REQUIRES_X86_SSE;
17209 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017210 for (uint32_t n = 1; n <= 2; n++) {
17211 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017212 GemmMicrokernelTester()
17213 .mr(4)
17214 .nr(2)
17215 .kr(4)
17216 .sr(1)
17217 .m(m)
17218 .n(n)
17219 .k(k)
17220 .iterations(1)
17221 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17222 }
17223 }
17224 }
17225 }
17226
17227 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_gt_4) {
17228 TEST_REQUIRES_X86_SSE;
17229 for (size_t k = 5; k < 8; k++) {
17230 GemmMicrokernelTester()
17231 .mr(4)
17232 .nr(2)
17233 .kr(4)
17234 .sr(1)
17235 .m(4)
17236 .n(2)
17237 .k(k)
17238 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17239 }
17240 }
17241
17242 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_gt_4_subtile) {
17243 TEST_REQUIRES_X86_SSE;
17244 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017245 for (uint32_t n = 1; n <= 2; n++) {
17246 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017247 GemmMicrokernelTester()
17248 .mr(4)
17249 .nr(2)
17250 .kr(4)
17251 .sr(1)
17252 .m(m)
17253 .n(n)
17254 .k(k)
17255 .iterations(1)
17256 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17257 }
17258 }
17259 }
17260 }
17261
17262 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_div_4) {
17263 TEST_REQUIRES_X86_SSE;
17264 for (size_t k = 8; k <= 40; k += 4) {
17265 GemmMicrokernelTester()
17266 .mr(4)
17267 .nr(2)
17268 .kr(4)
17269 .sr(1)
17270 .m(4)
17271 .n(2)
17272 .k(k)
17273 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17274 }
17275 }
17276
17277 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, k_div_4_subtile) {
17278 TEST_REQUIRES_X86_SSE;
17279 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017280 for (uint32_t n = 1; n <= 2; n++) {
17281 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017282 GemmMicrokernelTester()
17283 .mr(4)
17284 .nr(2)
17285 .kr(4)
17286 .sr(1)
17287 .m(m)
17288 .n(n)
17289 .k(k)
17290 .iterations(1)
17291 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17292 }
17293 }
17294 }
17295 }
17296
17297 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2) {
17298 TEST_REQUIRES_X86_SSE;
17299 for (uint32_t n = 3; n < 4; n++) {
17300 for (size_t k = 1; k <= 20; k += 5) {
17301 GemmMicrokernelTester()
17302 .mr(4)
17303 .nr(2)
17304 .kr(4)
17305 .sr(1)
17306 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017307 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017308 .k(k)
17309 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17310 }
17311 }
17312 }
17313
17314 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2_strided_cn) {
17315 TEST_REQUIRES_X86_SSE;
17316 for (uint32_t n = 3; n < 4; n++) {
17317 for (size_t k = 1; k <= 20; k += 5) {
17318 GemmMicrokernelTester()
17319 .mr(4)
17320 .nr(2)
17321 .kr(4)
17322 .sr(1)
17323 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017324 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017325 .k(k)
17326 .cn_stride(5)
17327 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17328 }
17329 }
17330 }
17331
17332 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2_subtile) {
17333 TEST_REQUIRES_X86_SSE;
17334 for (uint32_t n = 3; n < 4; n++) {
17335 for (size_t k = 1; k <= 20; k += 5) {
17336 for (uint32_t m = 1; m <= 4; m++) {
17337 GemmMicrokernelTester()
17338 .mr(4)
17339 .nr(2)
17340 .kr(4)
17341 .sr(1)
17342 .m(m)
17343 .n(n)
17344 .k(k)
17345 .iterations(1)
17346 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17347 }
17348 }
17349 }
17350 }
17351
17352 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2) {
17353 TEST_REQUIRES_X86_SSE;
17354 for (uint32_t n = 4; n <= 6; n += 2) {
17355 for (size_t k = 1; k <= 20; k += 5) {
17356 GemmMicrokernelTester()
17357 .mr(4)
17358 .nr(2)
17359 .kr(4)
17360 .sr(1)
17361 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017362 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017363 .k(k)
17364 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17365 }
17366 }
17367 }
17368
17369 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2_strided_cn) {
17370 TEST_REQUIRES_X86_SSE;
17371 for (uint32_t n = 4; n <= 6; n += 2) {
17372 for (size_t k = 1; k <= 20; k += 5) {
17373 GemmMicrokernelTester()
17374 .mr(4)
17375 .nr(2)
17376 .kr(4)
17377 .sr(1)
17378 .m(4)
17379 .n(n)
17380 .k(k)
17381 .cn_stride(5)
17382 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17383 }
17384 }
17385 }
17386
17387 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2_subtile) {
17388 TEST_REQUIRES_X86_SSE;
17389 for (uint32_t n = 4; n <= 6; n += 2) {
17390 for (size_t k = 1; k <= 20; k += 5) {
17391 for (uint32_t m = 1; m <= 4; m++) {
17392 GemmMicrokernelTester()
17393 .mr(4)
17394 .nr(2)
17395 .kr(4)
17396 .sr(1)
17397 .m(m)
17398 .n(n)
17399 .k(k)
17400 .iterations(1)
17401 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17402 }
17403 }
17404 }
17405 }
17406
17407 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, small_kernel) {
17408 TEST_REQUIRES_X86_SSE;
17409 for (size_t k = 1; k <= 20; k += 5) {
17410 GemmMicrokernelTester()
17411 .mr(4)
17412 .nr(2)
17413 .kr(4)
17414 .sr(1)
17415 .m(4)
17416 .n(2)
17417 .k(k)
17418 .ks(3)
17419 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17420 }
17421 }
17422
17423 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, small_kernel_subtile) {
17424 TEST_REQUIRES_X86_SSE;
17425 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017426 for (uint32_t n = 1; n <= 2; n++) {
17427 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017428 GemmMicrokernelTester()
17429 .mr(4)
17430 .nr(2)
17431 .kr(4)
17432 .sr(1)
17433 .m(m)
17434 .n(n)
17435 .k(k)
17436 .ks(3)
17437 .iterations(1)
17438 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17439 }
17440 }
17441 }
17442 }
17443
17444 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_gt_2_small_kernel) {
17445 TEST_REQUIRES_X86_SSE;
17446 for (uint32_t n = 3; n < 4; n++) {
17447 for (size_t k = 1; k <= 20; k += 5) {
17448 GemmMicrokernelTester()
17449 .mr(4)
17450 .nr(2)
17451 .kr(4)
17452 .sr(1)
17453 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017454 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017455 .k(k)
17456 .ks(3)
17457 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17458 }
17459 }
17460 }
17461
17462 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, n_div_2_small_kernel) {
17463 TEST_REQUIRES_X86_SSE;
17464 for (uint32_t n = 4; n <= 6; n += 2) {
17465 for (size_t k = 1; k <= 20; k += 5) {
17466 GemmMicrokernelTester()
17467 .mr(4)
17468 .nr(2)
17469 .kr(4)
17470 .sr(1)
17471 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017472 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017473 .k(k)
17474 .ks(3)
17475 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17476 }
17477 }
17478 }
17479
17480 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, strided_cm_subtile) {
17481 TEST_REQUIRES_X86_SSE;
17482 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017483 for (uint32_t n = 1; n <= 2; n++) {
17484 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017485 GemmMicrokernelTester()
17486 .mr(4)
17487 .nr(2)
17488 .kr(4)
17489 .sr(1)
17490 .m(m)
17491 .n(n)
17492 .k(k)
17493 .cm_stride(5)
17494 .iterations(1)
17495 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17496 }
17497 }
17498 }
17499 }
17500
17501 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, a_offset) {
17502 TEST_REQUIRES_X86_SSE;
17503 for (size_t k = 1; k <= 20; k += 5) {
17504 GemmMicrokernelTester()
17505 .mr(4)
17506 .nr(2)
17507 .kr(4)
17508 .sr(1)
17509 .m(4)
17510 .n(2)
17511 .k(k)
17512 .ks(3)
17513 .a_offset(83)
17514 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17515 }
17516 }
17517
17518 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, zero) {
17519 TEST_REQUIRES_X86_SSE;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017520 for (size_t k = 1; k <= 20; k += 5) {
17521 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017522 GemmMicrokernelTester()
17523 .mr(4)
17524 .nr(2)
17525 .kr(4)
17526 .sr(1)
17527 .m(4)
17528 .n(2)
17529 .k(k)
17530 .ks(3)
17531 .a_offset(83)
17532 .zero_index(mz)
17533 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17534 }
17535 }
17536 }
17537
17538 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, qmin) {
17539 TEST_REQUIRES_X86_SSE;
17540 GemmMicrokernelTester()
17541 .mr(4)
17542 .nr(2)
17543 .kr(4)
17544 .sr(1)
17545 .m(4)
17546 .n(2)
17547 .k(4)
17548 .qmin(128)
17549 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17550 }
17551
17552 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, qmax) {
17553 TEST_REQUIRES_X86_SSE;
17554 GemmMicrokernelTester()
17555 .mr(4)
17556 .nr(2)
17557 .kr(4)
17558 .sr(1)
17559 .m(4)
17560 .n(2)
17561 .k(4)
17562 .qmax(128)
17563 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17564 }
17565
17566 TEST(F32_IGEMM_MINMAX_4X2C4__SSE, strided_cm) {
17567 TEST_REQUIRES_X86_SSE;
17568 GemmMicrokernelTester()
17569 .mr(4)
17570 .nr(2)
17571 .kr(4)
17572 .sr(1)
17573 .m(4)
17574 .n(2)
17575 .k(4)
17576 .cm_stride(5)
17577 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__sse, xnn_init_f32_minmax_sse_params);
17578 }
17579#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17580
17581
17582#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17583 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4) {
17584 TEST_REQUIRES_X86_SSE2;
17585 GemmMicrokernelTester()
17586 .mr(3)
17587 .nr(8)
17588 .kr(1)
17589 .sr(1)
17590 .m(3)
17591 .n(8)
17592 .k(4)
17593 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17594 }
17595
17596 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, strided_cn) {
17597 TEST_REQUIRES_X86_SSE2;
17598 GemmMicrokernelTester()
17599 .mr(3)
17600 .nr(8)
17601 .kr(1)
17602 .sr(1)
17603 .m(3)
17604 .n(8)
17605 .k(4)
17606 .cn_stride(11)
17607 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17608 }
17609
17610 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile) {
17611 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017612 for (uint32_t n = 1; n <= 8; n++) {
17613 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017614 GemmMicrokernelTester()
17615 .mr(3)
17616 .nr(8)
17617 .kr(1)
17618 .sr(1)
17619 .m(m)
17620 .n(n)
17621 .k(4)
17622 .iterations(1)
17623 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17624 }
17625 }
17626 }
17627
17628 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_m) {
17629 TEST_REQUIRES_X86_SSE2;
17630 for (uint32_t m = 1; m <= 3; m++) {
17631 GemmMicrokernelTester()
17632 .mr(3)
17633 .nr(8)
17634 .kr(1)
17635 .sr(1)
17636 .m(m)
17637 .n(8)
17638 .k(4)
17639 .iterations(1)
17640 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17641 }
17642 }
17643
17644 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_eq_4_subtile_n) {
17645 TEST_REQUIRES_X86_SSE2;
17646 for (uint32_t n = 1; n <= 8; n++) {
17647 GemmMicrokernelTester()
17648 .mr(3)
17649 .nr(8)
17650 .kr(1)
17651 .sr(1)
17652 .m(3)
17653 .n(n)
17654 .k(4)
17655 .iterations(1)
17656 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17657 }
17658 }
17659
17660 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_lt_4) {
17661 TEST_REQUIRES_X86_SSE2;
17662 for (size_t k = 1; k < 4; k++) {
17663 GemmMicrokernelTester()
17664 .mr(3)
17665 .nr(8)
17666 .kr(1)
17667 .sr(1)
17668 .m(3)
17669 .n(8)
17670 .k(k)
17671 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17672 }
17673 }
17674
17675 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_lt_4_subtile) {
17676 TEST_REQUIRES_X86_SSE2;
17677 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017678 for (uint32_t n = 1; n <= 8; n++) {
17679 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017680 GemmMicrokernelTester()
17681 .mr(3)
17682 .nr(8)
17683 .kr(1)
17684 .sr(1)
17685 .m(m)
17686 .n(n)
17687 .k(k)
17688 .iterations(1)
17689 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17690 }
17691 }
17692 }
17693 }
17694
17695 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_gt_4) {
17696 TEST_REQUIRES_X86_SSE2;
17697 for (size_t k = 5; k < 8; k++) {
17698 GemmMicrokernelTester()
17699 .mr(3)
17700 .nr(8)
17701 .kr(1)
17702 .sr(1)
17703 .m(3)
17704 .n(8)
17705 .k(k)
17706 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17707 }
17708 }
17709
17710 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_gt_4_subtile) {
17711 TEST_REQUIRES_X86_SSE2;
17712 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017713 for (uint32_t n = 1; n <= 8; n++) {
17714 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017715 GemmMicrokernelTester()
17716 .mr(3)
17717 .nr(8)
17718 .kr(1)
17719 .sr(1)
17720 .m(m)
17721 .n(n)
17722 .k(k)
17723 .iterations(1)
17724 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17725 }
17726 }
17727 }
17728 }
17729
17730 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_div_4) {
17731 TEST_REQUIRES_X86_SSE2;
17732 for (size_t k = 8; k <= 40; k += 4) {
17733 GemmMicrokernelTester()
17734 .mr(3)
17735 .nr(8)
17736 .kr(1)
17737 .sr(1)
17738 .m(3)
17739 .n(8)
17740 .k(k)
17741 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17742 }
17743 }
17744
17745 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, k_div_4_subtile) {
17746 TEST_REQUIRES_X86_SSE2;
17747 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017748 for (uint32_t n = 1; n <= 8; n++) {
17749 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017750 GemmMicrokernelTester()
17751 .mr(3)
17752 .nr(8)
17753 .kr(1)
17754 .sr(1)
17755 .m(m)
17756 .n(n)
17757 .k(k)
17758 .iterations(1)
17759 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17760 }
17761 }
17762 }
17763 }
17764
17765 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8) {
17766 TEST_REQUIRES_X86_SSE2;
17767 for (uint32_t n = 9; n < 16; n++) {
17768 for (size_t k = 1; k <= 20; k += 5) {
17769 GemmMicrokernelTester()
17770 .mr(3)
17771 .nr(8)
17772 .kr(1)
17773 .sr(1)
17774 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017775 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017776 .k(k)
17777 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17778 }
17779 }
17780 }
17781
17782 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8_strided_cn) {
17783 TEST_REQUIRES_X86_SSE2;
17784 for (uint32_t n = 9; n < 16; n++) {
17785 for (size_t k = 1; k <= 20; k += 5) {
17786 GemmMicrokernelTester()
17787 .mr(3)
17788 .nr(8)
17789 .kr(1)
17790 .sr(1)
17791 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017792 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017793 .k(k)
17794 .cn_stride(11)
17795 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17796 }
17797 }
17798 }
17799
17800 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8_subtile) {
17801 TEST_REQUIRES_X86_SSE2;
17802 for (uint32_t n = 9; n < 16; n++) {
17803 for (size_t k = 1; k <= 20; k += 5) {
17804 for (uint32_t m = 1; m <= 3; m++) {
17805 GemmMicrokernelTester()
17806 .mr(3)
17807 .nr(8)
17808 .kr(1)
17809 .sr(1)
17810 .m(m)
17811 .n(n)
17812 .k(k)
17813 .iterations(1)
17814 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17815 }
17816 }
17817 }
17818 }
17819
17820 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8) {
17821 TEST_REQUIRES_X86_SSE2;
17822 for (uint32_t n = 16; n <= 24; n += 8) {
17823 for (size_t k = 1; k <= 20; k += 5) {
17824 GemmMicrokernelTester()
17825 .mr(3)
17826 .nr(8)
17827 .kr(1)
17828 .sr(1)
17829 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017830 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017831 .k(k)
17832 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17833 }
17834 }
17835 }
17836
17837 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8_strided_cn) {
17838 TEST_REQUIRES_X86_SSE2;
17839 for (uint32_t n = 16; n <= 24; n += 8) {
17840 for (size_t k = 1; k <= 20; k += 5) {
17841 GemmMicrokernelTester()
17842 .mr(3)
17843 .nr(8)
17844 .kr(1)
17845 .sr(1)
17846 .m(3)
17847 .n(n)
17848 .k(k)
17849 .cn_stride(11)
17850 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17851 }
17852 }
17853 }
17854
17855 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8_subtile) {
17856 TEST_REQUIRES_X86_SSE2;
17857 for (uint32_t n = 16; n <= 24; n += 8) {
17858 for (size_t k = 1; k <= 20; k += 5) {
17859 for (uint32_t m = 1; m <= 3; m++) {
17860 GemmMicrokernelTester()
17861 .mr(3)
17862 .nr(8)
17863 .kr(1)
17864 .sr(1)
17865 .m(m)
17866 .n(n)
17867 .k(k)
17868 .iterations(1)
17869 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17870 }
17871 }
17872 }
17873 }
17874
17875 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, small_kernel) {
17876 TEST_REQUIRES_X86_SSE2;
17877 for (size_t k = 1; k <= 20; k += 5) {
17878 GemmMicrokernelTester()
17879 .mr(3)
17880 .nr(8)
17881 .kr(1)
17882 .sr(1)
17883 .m(3)
17884 .n(8)
17885 .k(k)
17886 .ks(3)
17887 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17888 }
17889 }
17890
17891 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, small_kernel_subtile) {
17892 TEST_REQUIRES_X86_SSE2;
17893 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017894 for (uint32_t n = 1; n <= 8; n++) {
17895 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017896 GemmMicrokernelTester()
17897 .mr(3)
17898 .nr(8)
17899 .kr(1)
17900 .sr(1)
17901 .m(m)
17902 .n(n)
17903 .k(k)
17904 .ks(3)
17905 .iterations(1)
17906 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17907 }
17908 }
17909 }
17910 }
17911
17912 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_gt_8_small_kernel) {
17913 TEST_REQUIRES_X86_SSE2;
17914 for (uint32_t n = 9; n < 16; n++) {
17915 for (size_t k = 1; k <= 20; k += 5) {
17916 GemmMicrokernelTester()
17917 .mr(3)
17918 .nr(8)
17919 .kr(1)
17920 .sr(1)
17921 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017922 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017923 .k(k)
17924 .ks(3)
17925 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17926 }
17927 }
17928 }
17929
17930 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, n_div_8_small_kernel) {
17931 TEST_REQUIRES_X86_SSE2;
17932 for (uint32_t n = 16; n <= 24; n += 8) {
17933 for (size_t k = 1; k <= 20; k += 5) {
17934 GemmMicrokernelTester()
17935 .mr(3)
17936 .nr(8)
17937 .kr(1)
17938 .sr(1)
17939 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017940 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017941 .k(k)
17942 .ks(3)
17943 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17944 }
17945 }
17946 }
17947
17948 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, strided_cm_subtile) {
17949 TEST_REQUIRES_X86_SSE2;
17950 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017951 for (uint32_t n = 1; n <= 8; n++) {
17952 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017953 GemmMicrokernelTester()
17954 .mr(3)
17955 .nr(8)
17956 .kr(1)
17957 .sr(1)
17958 .m(m)
17959 .n(n)
17960 .k(k)
17961 .cm_stride(11)
17962 .iterations(1)
17963 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17964 }
17965 }
17966 }
17967 }
17968
17969 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, a_offset) {
17970 TEST_REQUIRES_X86_SSE2;
17971 for (size_t k = 1; k <= 20; k += 5) {
17972 GemmMicrokernelTester()
17973 .mr(3)
17974 .nr(8)
17975 .kr(1)
17976 .sr(1)
17977 .m(3)
17978 .n(8)
17979 .k(k)
17980 .ks(3)
17981 .a_offset(67)
17982 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
17983 }
17984 }
17985
17986 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, zero) {
17987 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017988 for (size_t k = 1; k <= 20; k += 5) {
17989 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017990 GemmMicrokernelTester()
17991 .mr(3)
17992 .nr(8)
17993 .kr(1)
17994 .sr(1)
17995 .m(3)
17996 .n(8)
17997 .k(k)
17998 .ks(3)
17999 .a_offset(67)
18000 .zero_index(mz)
18001 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18002 }
18003 }
18004 }
18005
18006 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, qmin) {
18007 TEST_REQUIRES_X86_SSE2;
18008 GemmMicrokernelTester()
18009 .mr(3)
18010 .nr(8)
18011 .kr(1)
18012 .sr(1)
18013 .m(3)
18014 .n(8)
18015 .k(4)
18016 .qmin(128)
18017 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18018 }
18019
18020 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, qmax) {
18021 TEST_REQUIRES_X86_SSE2;
18022 GemmMicrokernelTester()
18023 .mr(3)
18024 .nr(8)
18025 .kr(1)
18026 .sr(1)
18027 .m(3)
18028 .n(8)
18029 .k(4)
18030 .qmax(128)
18031 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18032 }
18033
18034 TEST(F32_IGEMM_MINMAX_3X8__SSE2_DUP, strided_cm) {
18035 TEST_REQUIRES_X86_SSE2;
18036 GemmMicrokernelTester()
18037 .mr(3)
18038 .nr(8)
18039 .kr(1)
18040 .sr(1)
18041 .m(3)
18042 .n(8)
18043 .k(4)
18044 .cm_stride(11)
18045 .Test(xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18046 }
18047#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18048
18049
18050#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18051 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4) {
18052 TEST_REQUIRES_X86_SSE2;
18053 GemmMicrokernelTester()
18054 .mr(4)
18055 .nr(8)
18056 .kr(1)
18057 .sr(1)
18058 .m(4)
18059 .n(8)
18060 .k(4)
18061 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18062 }
18063
18064 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, strided_cn) {
18065 TEST_REQUIRES_X86_SSE2;
18066 GemmMicrokernelTester()
18067 .mr(4)
18068 .nr(8)
18069 .kr(1)
18070 .sr(1)
18071 .m(4)
18072 .n(8)
18073 .k(4)
18074 .cn_stride(11)
18075 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18076 }
18077
18078 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile) {
18079 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018080 for (uint32_t n = 1; n <= 8; n++) {
18081 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018082 GemmMicrokernelTester()
18083 .mr(4)
18084 .nr(8)
18085 .kr(1)
18086 .sr(1)
18087 .m(m)
18088 .n(n)
18089 .k(4)
18090 .iterations(1)
18091 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18092 }
18093 }
18094 }
18095
18096 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_m) {
18097 TEST_REQUIRES_X86_SSE2;
18098 for (uint32_t m = 1; m <= 4; m++) {
18099 GemmMicrokernelTester()
18100 .mr(4)
18101 .nr(8)
18102 .kr(1)
18103 .sr(1)
18104 .m(m)
18105 .n(8)
18106 .k(4)
18107 .iterations(1)
18108 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18109 }
18110 }
18111
18112 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_eq_4_subtile_n) {
18113 TEST_REQUIRES_X86_SSE2;
18114 for (uint32_t n = 1; n <= 8; n++) {
18115 GemmMicrokernelTester()
18116 .mr(4)
18117 .nr(8)
18118 .kr(1)
18119 .sr(1)
18120 .m(4)
18121 .n(n)
18122 .k(4)
18123 .iterations(1)
18124 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18125 }
18126 }
18127
18128 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_lt_4) {
18129 TEST_REQUIRES_X86_SSE2;
18130 for (size_t k = 1; k < 4; k++) {
18131 GemmMicrokernelTester()
18132 .mr(4)
18133 .nr(8)
18134 .kr(1)
18135 .sr(1)
18136 .m(4)
18137 .n(8)
18138 .k(k)
18139 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18140 }
18141 }
18142
18143 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_lt_4_subtile) {
18144 TEST_REQUIRES_X86_SSE2;
18145 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018146 for (uint32_t n = 1; n <= 8; n++) {
18147 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018148 GemmMicrokernelTester()
18149 .mr(4)
18150 .nr(8)
18151 .kr(1)
18152 .sr(1)
18153 .m(m)
18154 .n(n)
18155 .k(k)
18156 .iterations(1)
18157 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18158 }
18159 }
18160 }
18161 }
18162
18163 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_gt_4) {
18164 TEST_REQUIRES_X86_SSE2;
18165 for (size_t k = 5; k < 8; k++) {
18166 GemmMicrokernelTester()
18167 .mr(4)
18168 .nr(8)
18169 .kr(1)
18170 .sr(1)
18171 .m(4)
18172 .n(8)
18173 .k(k)
18174 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18175 }
18176 }
18177
18178 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_gt_4_subtile) {
18179 TEST_REQUIRES_X86_SSE2;
18180 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018181 for (uint32_t n = 1; n <= 8; n++) {
18182 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018183 GemmMicrokernelTester()
18184 .mr(4)
18185 .nr(8)
18186 .kr(1)
18187 .sr(1)
18188 .m(m)
18189 .n(n)
18190 .k(k)
18191 .iterations(1)
18192 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18193 }
18194 }
18195 }
18196 }
18197
18198 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_div_4) {
18199 TEST_REQUIRES_X86_SSE2;
18200 for (size_t k = 8; k <= 40; k += 4) {
18201 GemmMicrokernelTester()
18202 .mr(4)
18203 .nr(8)
18204 .kr(1)
18205 .sr(1)
18206 .m(4)
18207 .n(8)
18208 .k(k)
18209 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18210 }
18211 }
18212
18213 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, k_div_4_subtile) {
18214 TEST_REQUIRES_X86_SSE2;
18215 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018216 for (uint32_t n = 1; n <= 8; n++) {
18217 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018218 GemmMicrokernelTester()
18219 .mr(4)
18220 .nr(8)
18221 .kr(1)
18222 .sr(1)
18223 .m(m)
18224 .n(n)
18225 .k(k)
18226 .iterations(1)
18227 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18228 }
18229 }
18230 }
18231 }
18232
18233 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8) {
18234 TEST_REQUIRES_X86_SSE2;
18235 for (uint32_t n = 9; n < 16; n++) {
18236 for (size_t k = 1; k <= 20; k += 5) {
18237 GemmMicrokernelTester()
18238 .mr(4)
18239 .nr(8)
18240 .kr(1)
18241 .sr(1)
18242 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018243 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018244 .k(k)
18245 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18246 }
18247 }
18248 }
18249
18250 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8_strided_cn) {
18251 TEST_REQUIRES_X86_SSE2;
18252 for (uint32_t n = 9; n < 16; n++) {
18253 for (size_t k = 1; k <= 20; k += 5) {
18254 GemmMicrokernelTester()
18255 .mr(4)
18256 .nr(8)
18257 .kr(1)
18258 .sr(1)
18259 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018260 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018261 .k(k)
18262 .cn_stride(11)
18263 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18264 }
18265 }
18266 }
18267
18268 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8_subtile) {
18269 TEST_REQUIRES_X86_SSE2;
18270 for (uint32_t n = 9; n < 16; n++) {
18271 for (size_t k = 1; k <= 20; k += 5) {
18272 for (uint32_t m = 1; m <= 4; m++) {
18273 GemmMicrokernelTester()
18274 .mr(4)
18275 .nr(8)
18276 .kr(1)
18277 .sr(1)
18278 .m(m)
18279 .n(n)
18280 .k(k)
18281 .iterations(1)
18282 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18283 }
18284 }
18285 }
18286 }
18287
18288 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8) {
18289 TEST_REQUIRES_X86_SSE2;
18290 for (uint32_t n = 16; n <= 24; n += 8) {
18291 for (size_t k = 1; k <= 20; k += 5) {
18292 GemmMicrokernelTester()
18293 .mr(4)
18294 .nr(8)
18295 .kr(1)
18296 .sr(1)
18297 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018298 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018299 .k(k)
18300 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18301 }
18302 }
18303 }
18304
18305 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8_strided_cn) {
18306 TEST_REQUIRES_X86_SSE2;
18307 for (uint32_t n = 16; n <= 24; n += 8) {
18308 for (size_t k = 1; k <= 20; k += 5) {
18309 GemmMicrokernelTester()
18310 .mr(4)
18311 .nr(8)
18312 .kr(1)
18313 .sr(1)
18314 .m(4)
18315 .n(n)
18316 .k(k)
18317 .cn_stride(11)
18318 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18319 }
18320 }
18321 }
18322
18323 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8_subtile) {
18324 TEST_REQUIRES_X86_SSE2;
18325 for (uint32_t n = 16; n <= 24; n += 8) {
18326 for (size_t k = 1; k <= 20; k += 5) {
18327 for (uint32_t m = 1; m <= 4; m++) {
18328 GemmMicrokernelTester()
18329 .mr(4)
18330 .nr(8)
18331 .kr(1)
18332 .sr(1)
18333 .m(m)
18334 .n(n)
18335 .k(k)
18336 .iterations(1)
18337 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18338 }
18339 }
18340 }
18341 }
18342
18343 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, small_kernel) {
18344 TEST_REQUIRES_X86_SSE2;
18345 for (size_t k = 1; k <= 20; k += 5) {
18346 GemmMicrokernelTester()
18347 .mr(4)
18348 .nr(8)
18349 .kr(1)
18350 .sr(1)
18351 .m(4)
18352 .n(8)
18353 .k(k)
18354 .ks(3)
18355 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18356 }
18357 }
18358
18359 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, small_kernel_subtile) {
18360 TEST_REQUIRES_X86_SSE2;
18361 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018362 for (uint32_t n = 1; n <= 8; n++) {
18363 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018364 GemmMicrokernelTester()
18365 .mr(4)
18366 .nr(8)
18367 .kr(1)
18368 .sr(1)
18369 .m(m)
18370 .n(n)
18371 .k(k)
18372 .ks(3)
18373 .iterations(1)
18374 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18375 }
18376 }
18377 }
18378 }
18379
18380 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_gt_8_small_kernel) {
18381 TEST_REQUIRES_X86_SSE2;
18382 for (uint32_t n = 9; n < 16; n++) {
18383 for (size_t k = 1; k <= 20; k += 5) {
18384 GemmMicrokernelTester()
18385 .mr(4)
18386 .nr(8)
18387 .kr(1)
18388 .sr(1)
18389 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018390 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018391 .k(k)
18392 .ks(3)
18393 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18394 }
18395 }
18396 }
18397
18398 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, n_div_8_small_kernel) {
18399 TEST_REQUIRES_X86_SSE2;
18400 for (uint32_t n = 16; n <= 24; n += 8) {
18401 for (size_t k = 1; k <= 20; k += 5) {
18402 GemmMicrokernelTester()
18403 .mr(4)
18404 .nr(8)
18405 .kr(1)
18406 .sr(1)
18407 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018408 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018409 .k(k)
18410 .ks(3)
18411 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18412 }
18413 }
18414 }
18415
18416 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, strided_cm_subtile) {
18417 TEST_REQUIRES_X86_SSE2;
18418 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018419 for (uint32_t n = 1; n <= 8; n++) {
18420 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018421 GemmMicrokernelTester()
18422 .mr(4)
18423 .nr(8)
18424 .kr(1)
18425 .sr(1)
18426 .m(m)
18427 .n(n)
18428 .k(k)
18429 .cm_stride(11)
18430 .iterations(1)
18431 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18432 }
18433 }
18434 }
18435 }
18436
18437 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, a_offset) {
18438 TEST_REQUIRES_X86_SSE2;
18439 for (size_t k = 1; k <= 20; k += 5) {
18440 GemmMicrokernelTester()
18441 .mr(4)
18442 .nr(8)
18443 .kr(1)
18444 .sr(1)
18445 .m(4)
18446 .n(8)
18447 .k(k)
18448 .ks(3)
18449 .a_offset(83)
18450 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18451 }
18452 }
18453
18454 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, zero) {
18455 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018456 for (size_t k = 1; k <= 20; k += 5) {
18457 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018458 GemmMicrokernelTester()
18459 .mr(4)
18460 .nr(8)
18461 .kr(1)
18462 .sr(1)
18463 .m(4)
18464 .n(8)
18465 .k(k)
18466 .ks(3)
18467 .a_offset(83)
18468 .zero_index(mz)
18469 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18470 }
18471 }
18472 }
18473
18474 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, qmin) {
18475 TEST_REQUIRES_X86_SSE2;
18476 GemmMicrokernelTester()
18477 .mr(4)
18478 .nr(8)
18479 .kr(1)
18480 .sr(1)
18481 .m(4)
18482 .n(8)
18483 .k(4)
18484 .qmin(128)
18485 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18486 }
18487
18488 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, qmax) {
18489 TEST_REQUIRES_X86_SSE2;
18490 GemmMicrokernelTester()
18491 .mr(4)
18492 .nr(8)
18493 .kr(1)
18494 .sr(1)
18495 .m(4)
18496 .n(8)
18497 .k(4)
18498 .qmax(128)
18499 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18500 }
18501
18502 TEST(F32_IGEMM_MINMAX_4X8__SSE2_DUP, strided_cm) {
18503 TEST_REQUIRES_X86_SSE2;
18504 GemmMicrokernelTester()
18505 .mr(4)
18506 .nr(8)
18507 .kr(1)
18508 .sr(1)
18509 .m(4)
18510 .n(8)
18511 .k(4)
18512 .cm_stride(11)
18513 .Test(xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, xnn_init_f32_minmax_sse_params);
18514 }
18515#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18516
18517
18518#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18519 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1) {
18520 TEST_REQUIRES_X86_AVX;
18521 GemmMicrokernelTester()
18522 .mr(4)
18523 .nr(8)
18524 .kr(1)
18525 .sr(1)
18526 .m(4)
18527 .n(8)
18528 .k(1)
18529 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18530 }
18531
18532 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, strided_cn) {
18533 TEST_REQUIRES_X86_AVX;
18534 GemmMicrokernelTester()
18535 .mr(4)
18536 .nr(8)
18537 .kr(1)
18538 .sr(1)
18539 .m(4)
18540 .n(8)
18541 .k(1)
18542 .cn_stride(11)
18543 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18544 }
18545
18546 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile) {
18547 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018548 for (uint32_t n = 1; n <= 8; n++) {
18549 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018550 GemmMicrokernelTester()
18551 .mr(4)
18552 .nr(8)
18553 .kr(1)
18554 .sr(1)
18555 .m(m)
18556 .n(n)
18557 .k(1)
18558 .iterations(1)
18559 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18560 }
18561 }
18562 }
18563
18564 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_m) {
18565 TEST_REQUIRES_X86_AVX;
18566 for (uint32_t m = 1; m <= 4; m++) {
18567 GemmMicrokernelTester()
18568 .mr(4)
18569 .nr(8)
18570 .kr(1)
18571 .sr(1)
18572 .m(m)
18573 .n(8)
18574 .k(1)
18575 .iterations(1)
18576 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18577 }
18578 }
18579
18580 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_eq_1_subtile_n) {
18581 TEST_REQUIRES_X86_AVX;
18582 for (uint32_t n = 1; n <= 8; n++) {
18583 GemmMicrokernelTester()
18584 .mr(4)
18585 .nr(8)
18586 .kr(1)
18587 .sr(1)
18588 .m(4)
18589 .n(n)
18590 .k(1)
18591 .iterations(1)
18592 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18593 }
18594 }
18595
18596 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_gt_1) {
18597 TEST_REQUIRES_X86_AVX;
18598 for (size_t k = 2; k < 10; k++) {
18599 GemmMicrokernelTester()
18600 .mr(4)
18601 .nr(8)
18602 .kr(1)
18603 .sr(1)
18604 .m(4)
18605 .n(8)
18606 .k(k)
18607 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18608 }
18609 }
18610
18611 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, k_gt_1_subtile) {
18612 TEST_REQUIRES_X86_AVX;
18613 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018614 for (uint32_t n = 1; n <= 8; n++) {
18615 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018616 GemmMicrokernelTester()
18617 .mr(4)
18618 .nr(8)
18619 .kr(1)
18620 .sr(1)
18621 .m(m)
18622 .n(n)
18623 .k(k)
18624 .iterations(1)
18625 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18626 }
18627 }
18628 }
18629 }
18630
18631 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8) {
18632 TEST_REQUIRES_X86_AVX;
18633 for (uint32_t n = 9; n < 16; n++) {
18634 for (size_t k = 1; k <= 5; k += 2) {
18635 GemmMicrokernelTester()
18636 .mr(4)
18637 .nr(8)
18638 .kr(1)
18639 .sr(1)
18640 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018641 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018642 .k(k)
18643 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18644 }
18645 }
18646 }
18647
18648 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8_strided_cn) {
18649 TEST_REQUIRES_X86_AVX;
18650 for (uint32_t n = 9; n < 16; n++) {
18651 for (size_t k = 1; k <= 5; k += 2) {
18652 GemmMicrokernelTester()
18653 .mr(4)
18654 .nr(8)
18655 .kr(1)
18656 .sr(1)
18657 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018658 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018659 .k(k)
18660 .cn_stride(11)
18661 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18662 }
18663 }
18664 }
18665
18666 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8_subtile) {
18667 TEST_REQUIRES_X86_AVX;
18668 for (uint32_t n = 9; n < 16; n++) {
18669 for (size_t k = 1; k <= 5; k += 2) {
18670 for (uint32_t m = 1; m <= 4; m++) {
18671 GemmMicrokernelTester()
18672 .mr(4)
18673 .nr(8)
18674 .kr(1)
18675 .sr(1)
18676 .m(m)
18677 .n(n)
18678 .k(k)
18679 .iterations(1)
18680 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18681 }
18682 }
18683 }
18684 }
18685
18686 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8) {
18687 TEST_REQUIRES_X86_AVX;
18688 for (uint32_t n = 16; n <= 24; n += 8) {
18689 for (size_t k = 1; k <= 5; k += 2) {
18690 GemmMicrokernelTester()
18691 .mr(4)
18692 .nr(8)
18693 .kr(1)
18694 .sr(1)
18695 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018696 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018697 .k(k)
18698 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18699 }
18700 }
18701 }
18702
18703 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8_strided_cn) {
18704 TEST_REQUIRES_X86_AVX;
18705 for (uint32_t n = 16; n <= 24; n += 8) {
18706 for (size_t k = 1; k <= 5; k += 2) {
18707 GemmMicrokernelTester()
18708 .mr(4)
18709 .nr(8)
18710 .kr(1)
18711 .sr(1)
18712 .m(4)
18713 .n(n)
18714 .k(k)
18715 .cn_stride(11)
18716 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18717 }
18718 }
18719 }
18720
18721 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8_subtile) {
18722 TEST_REQUIRES_X86_AVX;
18723 for (uint32_t n = 16; n <= 24; n += 8) {
18724 for (size_t k = 1; k <= 5; k += 2) {
18725 for (uint32_t m = 1; m <= 4; m++) {
18726 GemmMicrokernelTester()
18727 .mr(4)
18728 .nr(8)
18729 .kr(1)
18730 .sr(1)
18731 .m(m)
18732 .n(n)
18733 .k(k)
18734 .iterations(1)
18735 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18736 }
18737 }
18738 }
18739 }
18740
18741 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, small_kernel) {
18742 TEST_REQUIRES_X86_AVX;
18743 for (size_t k = 1; k <= 5; k += 2) {
18744 GemmMicrokernelTester()
18745 .mr(4)
18746 .nr(8)
18747 .kr(1)
18748 .sr(1)
18749 .m(4)
18750 .n(8)
18751 .k(k)
18752 .ks(3)
18753 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18754 }
18755 }
18756
18757 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, small_kernel_subtile) {
18758 TEST_REQUIRES_X86_AVX;
18759 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018760 for (uint32_t n = 1; n <= 8; n++) {
18761 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018762 GemmMicrokernelTester()
18763 .mr(4)
18764 .nr(8)
18765 .kr(1)
18766 .sr(1)
18767 .m(m)
18768 .n(n)
18769 .k(k)
18770 .ks(3)
18771 .iterations(1)
18772 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18773 }
18774 }
18775 }
18776 }
18777
18778 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_gt_8_small_kernel) {
18779 TEST_REQUIRES_X86_AVX;
18780 for (uint32_t n = 9; n < 16; n++) {
18781 for (size_t k = 1; k <= 5; k += 2) {
18782 GemmMicrokernelTester()
18783 .mr(4)
18784 .nr(8)
18785 .kr(1)
18786 .sr(1)
18787 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018788 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018789 .k(k)
18790 .ks(3)
18791 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18792 }
18793 }
18794 }
18795
18796 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, n_div_8_small_kernel) {
18797 TEST_REQUIRES_X86_AVX;
18798 for (uint32_t n = 16; n <= 24; n += 8) {
18799 for (size_t k = 1; k <= 5; k += 2) {
18800 GemmMicrokernelTester()
18801 .mr(4)
18802 .nr(8)
18803 .kr(1)
18804 .sr(1)
18805 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018806 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018807 .k(k)
18808 .ks(3)
18809 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18810 }
18811 }
18812 }
18813
18814 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, strided_cm_subtile) {
18815 TEST_REQUIRES_X86_AVX;
18816 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018817 for (uint32_t n = 1; n <= 8; n++) {
18818 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018819 GemmMicrokernelTester()
18820 .mr(4)
18821 .nr(8)
18822 .kr(1)
18823 .sr(1)
18824 .m(m)
18825 .n(n)
18826 .k(k)
18827 .cm_stride(11)
18828 .iterations(1)
18829 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18830 }
18831 }
18832 }
18833 }
18834
18835 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, a_offset) {
18836 TEST_REQUIRES_X86_AVX;
18837 for (size_t k = 1; k <= 5; k += 2) {
18838 GemmMicrokernelTester()
18839 .mr(4)
18840 .nr(8)
18841 .kr(1)
18842 .sr(1)
18843 .m(4)
18844 .n(8)
18845 .k(k)
18846 .ks(3)
18847 .a_offset(23)
18848 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18849 }
18850 }
18851
18852 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, zero) {
18853 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018854 for (size_t k = 1; k <= 5; k += 2) {
18855 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018856 GemmMicrokernelTester()
18857 .mr(4)
18858 .nr(8)
18859 .kr(1)
18860 .sr(1)
18861 .m(4)
18862 .n(8)
18863 .k(k)
18864 .ks(3)
18865 .a_offset(23)
18866 .zero_index(mz)
18867 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18868 }
18869 }
18870 }
18871
18872 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, qmin) {
18873 TEST_REQUIRES_X86_AVX;
18874 GemmMicrokernelTester()
18875 .mr(4)
18876 .nr(8)
18877 .kr(1)
18878 .sr(1)
18879 .m(4)
18880 .n(8)
18881 .k(1)
18882 .qmin(128)
18883 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18884 }
18885
18886 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, qmax) {
18887 TEST_REQUIRES_X86_AVX;
18888 GemmMicrokernelTester()
18889 .mr(4)
18890 .nr(8)
18891 .kr(1)
18892 .sr(1)
18893 .m(4)
18894 .n(8)
18895 .k(1)
18896 .qmax(128)
18897 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18898 }
18899
18900 TEST(F32_IGEMM_MINMAX_4X8__AVX_BROADCAST, strided_cm) {
18901 TEST_REQUIRES_X86_AVX;
18902 GemmMicrokernelTester()
18903 .mr(4)
18904 .nr(8)
18905 .kr(1)
18906 .sr(1)
18907 .m(4)
18908 .n(8)
18909 .k(1)
18910 .cm_stride(11)
18911 .Test(xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18912 }
18913#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18914
18915
18916#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18917 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1) {
18918 TEST_REQUIRES_X86_AVX;
18919 GemmMicrokernelTester()
18920 .mr(7)
18921 .nr(8)
18922 .kr(1)
18923 .sr(1)
18924 .m(7)
18925 .n(8)
18926 .k(1)
18927 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18928 }
18929
18930 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, strided_cn) {
18931 TEST_REQUIRES_X86_AVX;
18932 GemmMicrokernelTester()
18933 .mr(7)
18934 .nr(8)
18935 .kr(1)
18936 .sr(1)
18937 .m(7)
18938 .n(8)
18939 .k(1)
18940 .cn_stride(11)
18941 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18942 }
18943
18944 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile) {
18945 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018946 for (uint32_t n = 1; n <= 8; n++) {
18947 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018948 GemmMicrokernelTester()
18949 .mr(7)
18950 .nr(8)
18951 .kr(1)
18952 .sr(1)
18953 .m(m)
18954 .n(n)
18955 .k(1)
18956 .iterations(1)
18957 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18958 }
18959 }
18960 }
18961
18962 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_m) {
18963 TEST_REQUIRES_X86_AVX;
18964 for (uint32_t m = 1; m <= 7; m++) {
18965 GemmMicrokernelTester()
18966 .mr(7)
18967 .nr(8)
18968 .kr(1)
18969 .sr(1)
18970 .m(m)
18971 .n(8)
18972 .k(1)
18973 .iterations(1)
18974 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18975 }
18976 }
18977
18978 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_eq_1_subtile_n) {
18979 TEST_REQUIRES_X86_AVX;
18980 for (uint32_t n = 1; n <= 8; n++) {
18981 GemmMicrokernelTester()
18982 .mr(7)
18983 .nr(8)
18984 .kr(1)
18985 .sr(1)
18986 .m(7)
18987 .n(n)
18988 .k(1)
18989 .iterations(1)
18990 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
18991 }
18992 }
18993
18994 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_gt_1) {
18995 TEST_REQUIRES_X86_AVX;
18996 for (size_t k = 2; k < 10; k++) {
18997 GemmMicrokernelTester()
18998 .mr(7)
18999 .nr(8)
19000 .kr(1)
19001 .sr(1)
19002 .m(7)
19003 .n(8)
19004 .k(k)
19005 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19006 }
19007 }
19008
19009 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, k_gt_1_subtile) {
19010 TEST_REQUIRES_X86_AVX;
19011 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019012 for (uint32_t n = 1; n <= 8; n++) {
19013 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019014 GemmMicrokernelTester()
19015 .mr(7)
19016 .nr(8)
19017 .kr(1)
19018 .sr(1)
19019 .m(m)
19020 .n(n)
19021 .k(k)
19022 .iterations(1)
19023 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19024 }
19025 }
19026 }
19027 }
19028
19029 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8) {
19030 TEST_REQUIRES_X86_AVX;
19031 for (uint32_t n = 9; n < 16; n++) {
19032 for (size_t k = 1; k <= 5; k += 2) {
19033 GemmMicrokernelTester()
19034 .mr(7)
19035 .nr(8)
19036 .kr(1)
19037 .sr(1)
19038 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019039 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019040 .k(k)
19041 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19042 }
19043 }
19044 }
19045
19046 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8_strided_cn) {
19047 TEST_REQUIRES_X86_AVX;
19048 for (uint32_t n = 9; n < 16; n++) {
19049 for (size_t k = 1; k <= 5; k += 2) {
19050 GemmMicrokernelTester()
19051 .mr(7)
19052 .nr(8)
19053 .kr(1)
19054 .sr(1)
19055 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019056 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019057 .k(k)
19058 .cn_stride(11)
19059 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19060 }
19061 }
19062 }
19063
19064 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8_subtile) {
19065 TEST_REQUIRES_X86_AVX;
19066 for (uint32_t n = 9; n < 16; n++) {
19067 for (size_t k = 1; k <= 5; k += 2) {
19068 for (uint32_t m = 1; m <= 7; m++) {
19069 GemmMicrokernelTester()
19070 .mr(7)
19071 .nr(8)
19072 .kr(1)
19073 .sr(1)
19074 .m(m)
19075 .n(n)
19076 .k(k)
19077 .iterations(1)
19078 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19079 }
19080 }
19081 }
19082 }
19083
19084 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8) {
19085 TEST_REQUIRES_X86_AVX;
19086 for (uint32_t n = 16; n <= 24; n += 8) {
19087 for (size_t k = 1; k <= 5; k += 2) {
19088 GemmMicrokernelTester()
19089 .mr(7)
19090 .nr(8)
19091 .kr(1)
19092 .sr(1)
19093 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019094 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019095 .k(k)
19096 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19097 }
19098 }
19099 }
19100
19101 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8_strided_cn) {
19102 TEST_REQUIRES_X86_AVX;
19103 for (uint32_t n = 16; n <= 24; n += 8) {
19104 for (size_t k = 1; k <= 5; k += 2) {
19105 GemmMicrokernelTester()
19106 .mr(7)
19107 .nr(8)
19108 .kr(1)
19109 .sr(1)
19110 .m(7)
19111 .n(n)
19112 .k(k)
19113 .cn_stride(11)
19114 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19115 }
19116 }
19117 }
19118
19119 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8_subtile) {
19120 TEST_REQUIRES_X86_AVX;
19121 for (uint32_t n = 16; n <= 24; n += 8) {
19122 for (size_t k = 1; k <= 5; k += 2) {
19123 for (uint32_t m = 1; m <= 7; m++) {
19124 GemmMicrokernelTester()
19125 .mr(7)
19126 .nr(8)
19127 .kr(1)
19128 .sr(1)
19129 .m(m)
19130 .n(n)
19131 .k(k)
19132 .iterations(1)
19133 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19134 }
19135 }
19136 }
19137 }
19138
19139 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, small_kernel) {
19140 TEST_REQUIRES_X86_AVX;
19141 for (size_t k = 1; k <= 5; k += 2) {
19142 GemmMicrokernelTester()
19143 .mr(7)
19144 .nr(8)
19145 .kr(1)
19146 .sr(1)
19147 .m(7)
19148 .n(8)
19149 .k(k)
19150 .ks(3)
19151 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19152 }
19153 }
19154
19155 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, small_kernel_subtile) {
19156 TEST_REQUIRES_X86_AVX;
19157 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019158 for (uint32_t n = 1; n <= 8; n++) {
19159 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019160 GemmMicrokernelTester()
19161 .mr(7)
19162 .nr(8)
19163 .kr(1)
19164 .sr(1)
19165 .m(m)
19166 .n(n)
19167 .k(k)
19168 .ks(3)
19169 .iterations(1)
19170 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19171 }
19172 }
19173 }
19174 }
19175
19176 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_gt_8_small_kernel) {
19177 TEST_REQUIRES_X86_AVX;
19178 for (uint32_t n = 9; n < 16; n++) {
19179 for (size_t k = 1; k <= 5; k += 2) {
19180 GemmMicrokernelTester()
19181 .mr(7)
19182 .nr(8)
19183 .kr(1)
19184 .sr(1)
19185 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019186 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019187 .k(k)
19188 .ks(3)
19189 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19190 }
19191 }
19192 }
19193
19194 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, n_div_8_small_kernel) {
19195 TEST_REQUIRES_X86_AVX;
19196 for (uint32_t n = 16; n <= 24; n += 8) {
19197 for (size_t k = 1; k <= 5; k += 2) {
19198 GemmMicrokernelTester()
19199 .mr(7)
19200 .nr(8)
19201 .kr(1)
19202 .sr(1)
19203 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019204 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019205 .k(k)
19206 .ks(3)
19207 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19208 }
19209 }
19210 }
19211
19212 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, strided_cm_subtile) {
19213 TEST_REQUIRES_X86_AVX;
19214 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019215 for (uint32_t n = 1; n <= 8; n++) {
19216 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019217 GemmMicrokernelTester()
19218 .mr(7)
19219 .nr(8)
19220 .kr(1)
19221 .sr(1)
19222 .m(m)
19223 .n(n)
19224 .k(k)
19225 .cm_stride(11)
19226 .iterations(1)
19227 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19228 }
19229 }
19230 }
19231 }
19232
19233 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, a_offset) {
19234 TEST_REQUIRES_X86_AVX;
19235 for (size_t k = 1; k <= 5; k += 2) {
19236 GemmMicrokernelTester()
19237 .mr(7)
19238 .nr(8)
19239 .kr(1)
19240 .sr(1)
19241 .m(7)
19242 .n(8)
19243 .k(k)
19244 .ks(3)
19245 .a_offset(37)
19246 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19247 }
19248 }
19249
19250 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, zero) {
19251 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019252 for (size_t k = 1; k <= 5; k += 2) {
19253 for (uint32_t mz = 0; mz < 7; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019254 GemmMicrokernelTester()
19255 .mr(7)
19256 .nr(8)
19257 .kr(1)
19258 .sr(1)
19259 .m(7)
19260 .n(8)
19261 .k(k)
19262 .ks(3)
19263 .a_offset(37)
19264 .zero_index(mz)
19265 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19266 }
19267 }
19268 }
19269
19270 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, qmin) {
19271 TEST_REQUIRES_X86_AVX;
19272 GemmMicrokernelTester()
19273 .mr(7)
19274 .nr(8)
19275 .kr(1)
19276 .sr(1)
19277 .m(7)
19278 .n(8)
19279 .k(1)
19280 .qmin(128)
19281 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19282 }
19283
19284 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, qmax) {
19285 TEST_REQUIRES_X86_AVX;
19286 GemmMicrokernelTester()
19287 .mr(7)
19288 .nr(8)
19289 .kr(1)
19290 .sr(1)
19291 .m(7)
19292 .n(8)
19293 .k(1)
19294 .qmax(128)
19295 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19296 }
19297
19298 TEST(F32_IGEMM_MINMAX_7X8__AVX_BROADCAST, strided_cm) {
19299 TEST_REQUIRES_X86_AVX;
19300 GemmMicrokernelTester()
19301 .mr(7)
19302 .nr(8)
19303 .kr(1)
19304 .sr(1)
19305 .m(7)
19306 .n(8)
19307 .k(1)
19308 .cm_stride(11)
19309 .Test(xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, xnn_init_f32_minmax_avx_params);
19310 }
19311#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19312
19313
19314#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19315 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1) {
19316 TEST_REQUIRES_X86_AVX;
19317 GemmMicrokernelTester()
19318 .mr(3)
19319 .nr(16)
19320 .kr(1)
19321 .sr(1)
19322 .m(3)
19323 .n(16)
19324 .k(1)
19325 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19326 }
19327
19328 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, strided_cn) {
19329 TEST_REQUIRES_X86_AVX;
19330 GemmMicrokernelTester()
19331 .mr(3)
19332 .nr(16)
19333 .kr(1)
19334 .sr(1)
19335 .m(3)
19336 .n(16)
19337 .k(1)
19338 .cn_stride(19)
19339 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19340 }
19341
19342 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile) {
19343 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019344 for (uint32_t n = 1; n <= 16; n++) {
19345 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019346 GemmMicrokernelTester()
19347 .mr(3)
19348 .nr(16)
19349 .kr(1)
19350 .sr(1)
19351 .m(m)
19352 .n(n)
19353 .k(1)
19354 .iterations(1)
19355 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19356 }
19357 }
19358 }
19359
19360 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_m) {
19361 TEST_REQUIRES_X86_AVX;
19362 for (uint32_t m = 1; m <= 3; m++) {
19363 GemmMicrokernelTester()
19364 .mr(3)
19365 .nr(16)
19366 .kr(1)
19367 .sr(1)
19368 .m(m)
19369 .n(16)
19370 .k(1)
19371 .iterations(1)
19372 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19373 }
19374 }
19375
19376 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_eq_1_subtile_n) {
19377 TEST_REQUIRES_X86_AVX;
19378 for (uint32_t n = 1; n <= 16; n++) {
19379 GemmMicrokernelTester()
19380 .mr(3)
19381 .nr(16)
19382 .kr(1)
19383 .sr(1)
19384 .m(3)
19385 .n(n)
19386 .k(1)
19387 .iterations(1)
19388 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19389 }
19390 }
19391
19392 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1) {
19393 TEST_REQUIRES_X86_AVX;
19394 for (size_t k = 2; k < 10; k++) {
19395 GemmMicrokernelTester()
19396 .mr(3)
19397 .nr(16)
19398 .kr(1)
19399 .sr(1)
19400 .m(3)
19401 .n(16)
19402 .k(k)
19403 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19404 }
19405 }
19406
19407 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, k_gt_1_subtile) {
19408 TEST_REQUIRES_X86_AVX;
19409 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019410 for (uint32_t n = 1; n <= 16; n++) {
19411 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019412 GemmMicrokernelTester()
19413 .mr(3)
19414 .nr(16)
19415 .kr(1)
19416 .sr(1)
19417 .m(m)
19418 .n(n)
19419 .k(k)
19420 .iterations(1)
19421 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19422 }
19423 }
19424 }
19425 }
19426
19427 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16) {
19428 TEST_REQUIRES_X86_AVX;
19429 for (uint32_t n = 17; n < 32; n++) {
19430 for (size_t k = 1; k <= 5; k += 2) {
19431 GemmMicrokernelTester()
19432 .mr(3)
19433 .nr(16)
19434 .kr(1)
19435 .sr(1)
19436 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019437 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019438 .k(k)
19439 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19440 }
19441 }
19442 }
19443
19444 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_strided_cn) {
19445 TEST_REQUIRES_X86_AVX;
19446 for (uint32_t n = 17; n < 32; n++) {
19447 for (size_t k = 1; k <= 5; k += 2) {
19448 GemmMicrokernelTester()
19449 .mr(3)
19450 .nr(16)
19451 .kr(1)
19452 .sr(1)
19453 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019454 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019455 .k(k)
19456 .cn_stride(19)
19457 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19458 }
19459 }
19460 }
19461
19462 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_subtile) {
19463 TEST_REQUIRES_X86_AVX;
19464 for (uint32_t n = 17; n < 32; n++) {
19465 for (size_t k = 1; k <= 5; k += 2) {
19466 for (uint32_t m = 1; m <= 3; m++) {
19467 GemmMicrokernelTester()
19468 .mr(3)
19469 .nr(16)
19470 .kr(1)
19471 .sr(1)
19472 .m(m)
19473 .n(n)
19474 .k(k)
19475 .iterations(1)
19476 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19477 }
19478 }
19479 }
19480 }
19481
19482 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16) {
19483 TEST_REQUIRES_X86_AVX;
19484 for (uint32_t n = 32; n <= 48; n += 16) {
19485 for (size_t k = 1; k <= 5; k += 2) {
19486 GemmMicrokernelTester()
19487 .mr(3)
19488 .nr(16)
19489 .kr(1)
19490 .sr(1)
19491 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019492 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019493 .k(k)
19494 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19495 }
19496 }
19497 }
19498
19499 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_strided_cn) {
19500 TEST_REQUIRES_X86_AVX;
19501 for (uint32_t n = 32; n <= 48; n += 16) {
19502 for (size_t k = 1; k <= 5; k += 2) {
19503 GemmMicrokernelTester()
19504 .mr(3)
19505 .nr(16)
19506 .kr(1)
19507 .sr(1)
19508 .m(3)
19509 .n(n)
19510 .k(k)
19511 .cn_stride(19)
19512 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19513 }
19514 }
19515 }
19516
19517 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_subtile) {
19518 TEST_REQUIRES_X86_AVX;
19519 for (uint32_t n = 32; n <= 48; n += 16) {
19520 for (size_t k = 1; k <= 5; k += 2) {
19521 for (uint32_t m = 1; m <= 3; m++) {
19522 GemmMicrokernelTester()
19523 .mr(3)
19524 .nr(16)
19525 .kr(1)
19526 .sr(1)
19527 .m(m)
19528 .n(n)
19529 .k(k)
19530 .iterations(1)
19531 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19532 }
19533 }
19534 }
19535 }
19536
19537 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, small_kernel) {
19538 TEST_REQUIRES_X86_AVX;
19539 for (size_t k = 1; k <= 5; k += 2) {
19540 GemmMicrokernelTester()
19541 .mr(3)
19542 .nr(16)
19543 .kr(1)
19544 .sr(1)
19545 .m(3)
19546 .n(16)
19547 .k(k)
19548 .ks(3)
19549 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19550 }
19551 }
19552
19553 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, small_kernel_subtile) {
19554 TEST_REQUIRES_X86_AVX;
19555 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019556 for (uint32_t n = 1; n <= 16; n++) {
19557 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019558 GemmMicrokernelTester()
19559 .mr(3)
19560 .nr(16)
19561 .kr(1)
19562 .sr(1)
19563 .m(m)
19564 .n(n)
19565 .k(k)
19566 .ks(3)
19567 .iterations(1)
19568 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19569 }
19570 }
19571 }
19572 }
19573
19574 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_gt_16_small_kernel) {
19575 TEST_REQUIRES_X86_AVX;
19576 for (uint32_t n = 17; n < 32; n++) {
19577 for (size_t k = 1; k <= 5; k += 2) {
19578 GemmMicrokernelTester()
19579 .mr(3)
19580 .nr(16)
19581 .kr(1)
19582 .sr(1)
19583 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019584 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019585 .k(k)
19586 .ks(3)
19587 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19588 }
19589 }
19590 }
19591
19592 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, n_div_16_small_kernel) {
19593 TEST_REQUIRES_X86_AVX;
19594 for (uint32_t n = 32; n <= 48; n += 16) {
19595 for (size_t k = 1; k <= 5; k += 2) {
19596 GemmMicrokernelTester()
19597 .mr(3)
19598 .nr(16)
19599 .kr(1)
19600 .sr(1)
19601 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019602 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019603 .k(k)
19604 .ks(3)
19605 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19606 }
19607 }
19608 }
19609
19610 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, strided_cm_subtile) {
19611 TEST_REQUIRES_X86_AVX;
19612 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019613 for (uint32_t n = 1; n <= 16; n++) {
19614 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019615 GemmMicrokernelTester()
19616 .mr(3)
19617 .nr(16)
19618 .kr(1)
19619 .sr(1)
19620 .m(m)
19621 .n(n)
19622 .k(k)
19623 .cm_stride(19)
19624 .iterations(1)
19625 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19626 }
19627 }
19628 }
19629 }
19630
19631 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, a_offset) {
19632 TEST_REQUIRES_X86_AVX;
19633 for (size_t k = 1; k <= 5; k += 2) {
19634 GemmMicrokernelTester()
19635 .mr(3)
19636 .nr(16)
19637 .kr(1)
19638 .sr(1)
19639 .m(3)
19640 .n(16)
19641 .k(k)
19642 .ks(3)
19643 .a_offset(17)
19644 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19645 }
19646 }
19647
19648 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, zero) {
19649 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019650 for (size_t k = 1; k <= 5; k += 2) {
19651 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019652 GemmMicrokernelTester()
19653 .mr(3)
19654 .nr(16)
19655 .kr(1)
19656 .sr(1)
19657 .m(3)
19658 .n(16)
19659 .k(k)
19660 .ks(3)
19661 .a_offset(17)
19662 .zero_index(mz)
19663 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19664 }
19665 }
19666 }
19667
19668 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, qmin) {
19669 TEST_REQUIRES_X86_AVX;
19670 GemmMicrokernelTester()
19671 .mr(3)
19672 .nr(16)
19673 .kr(1)
19674 .sr(1)
19675 .m(3)
19676 .n(16)
19677 .k(1)
19678 .qmin(128)
19679 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19680 }
19681
19682 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, qmax) {
19683 TEST_REQUIRES_X86_AVX;
19684 GemmMicrokernelTester()
19685 .mr(3)
19686 .nr(16)
19687 .kr(1)
19688 .sr(1)
19689 .m(3)
19690 .n(16)
19691 .k(1)
19692 .qmax(128)
19693 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19694 }
19695
19696 TEST(F32_IGEMM_MINMAX_3X16__AVX_BROADCAST, strided_cm) {
19697 TEST_REQUIRES_X86_AVX;
19698 GemmMicrokernelTester()
19699 .mr(3)
19700 .nr(16)
19701 .kr(1)
19702 .sr(1)
19703 .m(3)
19704 .n(16)
19705 .k(1)
19706 .cm_stride(19)
19707 .Test(xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast, xnn_init_f32_minmax_avx_params);
19708 }
19709#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19710
19711
19712#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19713 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1) {
19714 TEST_REQUIRES_X86_FMA3;
19715 GemmMicrokernelTester()
19716 .mr(1)
19717 .nr(8)
19718 .kr(1)
19719 .sr(1)
19720 .m(1)
19721 .n(8)
19722 .k(1)
19723 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19724 }
19725
19726 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cn) {
19727 TEST_REQUIRES_X86_FMA3;
19728 GemmMicrokernelTester()
19729 .mr(1)
19730 .nr(8)
19731 .kr(1)
19732 .sr(1)
19733 .m(1)
19734 .n(8)
19735 .k(1)
19736 .cn_stride(11)
19737 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19738 }
19739
19740 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile) {
19741 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019742 for (uint32_t n = 1; n <= 8; n++) {
19743 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019744 GemmMicrokernelTester()
19745 .mr(1)
19746 .nr(8)
19747 .kr(1)
19748 .sr(1)
19749 .m(m)
19750 .n(n)
19751 .k(1)
19752 .iterations(1)
19753 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19754 }
19755 }
19756 }
19757
19758 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
19759 TEST_REQUIRES_X86_FMA3;
19760 for (uint32_t m = 1; m <= 1; m++) {
19761 GemmMicrokernelTester()
19762 .mr(1)
19763 .nr(8)
19764 .kr(1)
19765 .sr(1)
19766 .m(m)
19767 .n(8)
19768 .k(1)
19769 .iterations(1)
19770 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19771 }
19772 }
19773
19774 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
19775 TEST_REQUIRES_X86_FMA3;
19776 for (uint32_t n = 1; n <= 8; n++) {
19777 GemmMicrokernelTester()
19778 .mr(1)
19779 .nr(8)
19780 .kr(1)
19781 .sr(1)
19782 .m(1)
19783 .n(n)
19784 .k(1)
19785 .iterations(1)
19786 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19787 }
19788 }
19789
19790 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1) {
19791 TEST_REQUIRES_X86_FMA3;
19792 for (size_t k = 2; k < 10; k++) {
19793 GemmMicrokernelTester()
19794 .mr(1)
19795 .nr(8)
19796 .kr(1)
19797 .sr(1)
19798 .m(1)
19799 .n(8)
19800 .k(k)
19801 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19802 }
19803 }
19804
19805 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, k_gt_1_subtile) {
19806 TEST_REQUIRES_X86_FMA3;
19807 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019808 for (uint32_t n = 1; n <= 8; n++) {
19809 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019810 GemmMicrokernelTester()
19811 .mr(1)
19812 .nr(8)
19813 .kr(1)
19814 .sr(1)
19815 .m(m)
19816 .n(n)
19817 .k(k)
19818 .iterations(1)
19819 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19820 }
19821 }
19822 }
19823 }
19824
19825 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8) {
19826 TEST_REQUIRES_X86_FMA3;
19827 for (uint32_t n = 9; n < 16; n++) {
19828 for (size_t k = 1; k <= 5; k += 2) {
19829 GemmMicrokernelTester()
19830 .mr(1)
19831 .nr(8)
19832 .kr(1)
19833 .sr(1)
19834 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019835 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019836 .k(k)
19837 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19838 }
19839 }
19840 }
19841
19842 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
19843 TEST_REQUIRES_X86_FMA3;
19844 for (uint32_t n = 9; n < 16; n++) {
19845 for (size_t k = 1; k <= 5; k += 2) {
19846 GemmMicrokernelTester()
19847 .mr(1)
19848 .nr(8)
19849 .kr(1)
19850 .sr(1)
19851 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019852 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019853 .k(k)
19854 .cn_stride(11)
19855 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19856 }
19857 }
19858 }
19859
19860 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_subtile) {
19861 TEST_REQUIRES_X86_FMA3;
19862 for (uint32_t n = 9; n < 16; n++) {
19863 for (size_t k = 1; k <= 5; k += 2) {
19864 for (uint32_t m = 1; m <= 1; m++) {
19865 GemmMicrokernelTester()
19866 .mr(1)
19867 .nr(8)
19868 .kr(1)
19869 .sr(1)
19870 .m(m)
19871 .n(n)
19872 .k(k)
19873 .iterations(1)
19874 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19875 }
19876 }
19877 }
19878 }
19879
19880 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8) {
19881 TEST_REQUIRES_X86_FMA3;
19882 for (uint32_t n = 16; n <= 24; n += 8) {
19883 for (size_t k = 1; k <= 5; k += 2) {
19884 GemmMicrokernelTester()
19885 .mr(1)
19886 .nr(8)
19887 .kr(1)
19888 .sr(1)
19889 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019890 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019891 .k(k)
19892 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19893 }
19894 }
19895 }
19896
19897 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_strided_cn) {
19898 TEST_REQUIRES_X86_FMA3;
19899 for (uint32_t n = 16; n <= 24; n += 8) {
19900 for (size_t k = 1; k <= 5; k += 2) {
19901 GemmMicrokernelTester()
19902 .mr(1)
19903 .nr(8)
19904 .kr(1)
19905 .sr(1)
19906 .m(1)
19907 .n(n)
19908 .k(k)
19909 .cn_stride(11)
19910 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19911 }
19912 }
19913 }
19914
19915 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_subtile) {
19916 TEST_REQUIRES_X86_FMA3;
19917 for (uint32_t n = 16; n <= 24; n += 8) {
19918 for (size_t k = 1; k <= 5; k += 2) {
19919 for (uint32_t m = 1; m <= 1; m++) {
19920 GemmMicrokernelTester()
19921 .mr(1)
19922 .nr(8)
19923 .kr(1)
19924 .sr(1)
19925 .m(m)
19926 .n(n)
19927 .k(k)
19928 .iterations(1)
19929 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19930 }
19931 }
19932 }
19933 }
19934
19935 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, small_kernel) {
19936 TEST_REQUIRES_X86_FMA3;
19937 for (size_t k = 1; k <= 5; k += 2) {
19938 GemmMicrokernelTester()
19939 .mr(1)
19940 .nr(8)
19941 .kr(1)
19942 .sr(1)
19943 .m(1)
19944 .n(8)
19945 .k(k)
19946 .ks(3)
19947 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19948 }
19949 }
19950
19951 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, small_kernel_subtile) {
19952 TEST_REQUIRES_X86_FMA3;
19953 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019954 for (uint32_t n = 1; n <= 8; n++) {
19955 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019956 GemmMicrokernelTester()
19957 .mr(1)
19958 .nr(8)
19959 .kr(1)
19960 .sr(1)
19961 .m(m)
19962 .n(n)
19963 .k(k)
19964 .ks(3)
19965 .iterations(1)
19966 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19967 }
19968 }
19969 }
19970 }
19971
19972 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
19973 TEST_REQUIRES_X86_FMA3;
19974 for (uint32_t n = 9; n < 16; n++) {
19975 for (size_t k = 1; k <= 5; k += 2) {
19976 GemmMicrokernelTester()
19977 .mr(1)
19978 .nr(8)
19979 .kr(1)
19980 .sr(1)
19981 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019982 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019983 .k(k)
19984 .ks(3)
19985 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
19986 }
19987 }
19988 }
19989
19990 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, n_div_8_small_kernel) {
19991 TEST_REQUIRES_X86_FMA3;
19992 for (uint32_t n = 16; n <= 24; n += 8) {
19993 for (size_t k = 1; k <= 5; k += 2) {
19994 GemmMicrokernelTester()
19995 .mr(1)
19996 .nr(8)
19997 .kr(1)
19998 .sr(1)
19999 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020000 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020001 .k(k)
20002 .ks(3)
20003 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20004 }
20005 }
20006 }
20007
20008 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cm_subtile) {
20009 TEST_REQUIRES_X86_FMA3;
20010 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020011 for (uint32_t n = 1; n <= 8; n++) {
20012 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020013 GemmMicrokernelTester()
20014 .mr(1)
20015 .nr(8)
20016 .kr(1)
20017 .sr(1)
20018 .m(m)
20019 .n(n)
20020 .k(k)
20021 .cm_stride(11)
20022 .iterations(1)
20023 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20024 }
20025 }
20026 }
20027 }
20028
20029 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, a_offset) {
20030 TEST_REQUIRES_X86_FMA3;
20031 for (size_t k = 1; k <= 5; k += 2) {
20032 GemmMicrokernelTester()
20033 .mr(1)
20034 .nr(8)
20035 .kr(1)
20036 .sr(1)
20037 .m(1)
20038 .n(8)
20039 .k(k)
20040 .ks(3)
20041 .a_offset(7)
20042 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20043 }
20044 }
20045
20046 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, zero) {
20047 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020048 for (size_t k = 1; k <= 5; k += 2) {
20049 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020050 GemmMicrokernelTester()
20051 .mr(1)
20052 .nr(8)
20053 .kr(1)
20054 .sr(1)
20055 .m(1)
20056 .n(8)
20057 .k(k)
20058 .ks(3)
20059 .a_offset(7)
20060 .zero_index(mz)
20061 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20062 }
20063 }
20064 }
20065
20066 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, qmin) {
20067 TEST_REQUIRES_X86_FMA3;
20068 GemmMicrokernelTester()
20069 .mr(1)
20070 .nr(8)
20071 .kr(1)
20072 .sr(1)
20073 .m(1)
20074 .n(8)
20075 .k(1)
20076 .qmin(128)
20077 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20078 }
20079
20080 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, qmax) {
20081 TEST_REQUIRES_X86_FMA3;
20082 GemmMicrokernelTester()
20083 .mr(1)
20084 .nr(8)
20085 .kr(1)
20086 .sr(1)
20087 .m(1)
20088 .n(8)
20089 .k(1)
20090 .qmax(128)
20091 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20092 }
20093
20094 TEST(F32_IGEMM_MINMAX_1X8__FMA3_BROADCAST, strided_cm) {
20095 TEST_REQUIRES_X86_FMA3;
20096 GemmMicrokernelTester()
20097 .mr(1)
20098 .nr(8)
20099 .kr(1)
20100 .sr(1)
20101 .m(1)
20102 .n(8)
20103 .k(1)
20104 .cm_stride(11)
20105 .Test(xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20106 }
20107#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20108
20109
20110#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20111 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1) {
20112 TEST_REQUIRES_X86_FMA3;
20113 GemmMicrokernelTester()
20114 .mr(4)
20115 .nr(8)
20116 .kr(1)
20117 .sr(1)
20118 .m(4)
20119 .n(8)
20120 .k(1)
20121 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20122 }
20123
20124 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cn) {
20125 TEST_REQUIRES_X86_FMA3;
20126 GemmMicrokernelTester()
20127 .mr(4)
20128 .nr(8)
20129 .kr(1)
20130 .sr(1)
20131 .m(4)
20132 .n(8)
20133 .k(1)
20134 .cn_stride(11)
20135 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20136 }
20137
20138 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile) {
20139 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020140 for (uint32_t n = 1; n <= 8; n++) {
20141 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020142 GemmMicrokernelTester()
20143 .mr(4)
20144 .nr(8)
20145 .kr(1)
20146 .sr(1)
20147 .m(m)
20148 .n(n)
20149 .k(1)
20150 .iterations(1)
20151 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20152 }
20153 }
20154 }
20155
20156 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
20157 TEST_REQUIRES_X86_FMA3;
20158 for (uint32_t m = 1; m <= 4; m++) {
20159 GemmMicrokernelTester()
20160 .mr(4)
20161 .nr(8)
20162 .kr(1)
20163 .sr(1)
20164 .m(m)
20165 .n(8)
20166 .k(1)
20167 .iterations(1)
20168 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20169 }
20170 }
20171
20172 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
20173 TEST_REQUIRES_X86_FMA3;
20174 for (uint32_t n = 1; n <= 8; n++) {
20175 GemmMicrokernelTester()
20176 .mr(4)
20177 .nr(8)
20178 .kr(1)
20179 .sr(1)
20180 .m(4)
20181 .n(n)
20182 .k(1)
20183 .iterations(1)
20184 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20185 }
20186 }
20187
20188 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1) {
20189 TEST_REQUIRES_X86_FMA3;
20190 for (size_t k = 2; k < 10; k++) {
20191 GemmMicrokernelTester()
20192 .mr(4)
20193 .nr(8)
20194 .kr(1)
20195 .sr(1)
20196 .m(4)
20197 .n(8)
20198 .k(k)
20199 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20200 }
20201 }
20202
20203 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, k_gt_1_subtile) {
20204 TEST_REQUIRES_X86_FMA3;
20205 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020206 for (uint32_t n = 1; n <= 8; n++) {
20207 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020208 GemmMicrokernelTester()
20209 .mr(4)
20210 .nr(8)
20211 .kr(1)
20212 .sr(1)
20213 .m(m)
20214 .n(n)
20215 .k(k)
20216 .iterations(1)
20217 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20218 }
20219 }
20220 }
20221 }
20222
20223 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8) {
20224 TEST_REQUIRES_X86_FMA3;
20225 for (uint32_t n = 9; n < 16; n++) {
20226 for (size_t k = 1; k <= 5; k += 2) {
20227 GemmMicrokernelTester()
20228 .mr(4)
20229 .nr(8)
20230 .kr(1)
20231 .sr(1)
20232 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020233 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020234 .k(k)
20235 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20236 }
20237 }
20238 }
20239
20240 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
20241 TEST_REQUIRES_X86_FMA3;
20242 for (uint32_t n = 9; n < 16; n++) {
20243 for (size_t k = 1; k <= 5; k += 2) {
20244 GemmMicrokernelTester()
20245 .mr(4)
20246 .nr(8)
20247 .kr(1)
20248 .sr(1)
20249 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020250 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020251 .k(k)
20252 .cn_stride(11)
20253 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20254 }
20255 }
20256 }
20257
20258 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_subtile) {
20259 TEST_REQUIRES_X86_FMA3;
20260 for (uint32_t n = 9; n < 16; n++) {
20261 for (size_t k = 1; k <= 5; k += 2) {
20262 for (uint32_t m = 1; m <= 4; m++) {
20263 GemmMicrokernelTester()
20264 .mr(4)
20265 .nr(8)
20266 .kr(1)
20267 .sr(1)
20268 .m(m)
20269 .n(n)
20270 .k(k)
20271 .iterations(1)
20272 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20273 }
20274 }
20275 }
20276 }
20277
20278 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8) {
20279 TEST_REQUIRES_X86_FMA3;
20280 for (uint32_t n = 16; n <= 24; n += 8) {
20281 for (size_t k = 1; k <= 5; k += 2) {
20282 GemmMicrokernelTester()
20283 .mr(4)
20284 .nr(8)
20285 .kr(1)
20286 .sr(1)
20287 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020288 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020289 .k(k)
20290 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20291 }
20292 }
20293 }
20294
20295 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_strided_cn) {
20296 TEST_REQUIRES_X86_FMA3;
20297 for (uint32_t n = 16; n <= 24; n += 8) {
20298 for (size_t k = 1; k <= 5; k += 2) {
20299 GemmMicrokernelTester()
20300 .mr(4)
20301 .nr(8)
20302 .kr(1)
20303 .sr(1)
20304 .m(4)
20305 .n(n)
20306 .k(k)
20307 .cn_stride(11)
20308 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20309 }
20310 }
20311 }
20312
20313 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_subtile) {
20314 TEST_REQUIRES_X86_FMA3;
20315 for (uint32_t n = 16; n <= 24; n += 8) {
20316 for (size_t k = 1; k <= 5; k += 2) {
20317 for (uint32_t m = 1; m <= 4; m++) {
20318 GemmMicrokernelTester()
20319 .mr(4)
20320 .nr(8)
20321 .kr(1)
20322 .sr(1)
20323 .m(m)
20324 .n(n)
20325 .k(k)
20326 .iterations(1)
20327 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20328 }
20329 }
20330 }
20331 }
20332
20333 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, small_kernel) {
20334 TEST_REQUIRES_X86_FMA3;
20335 for (size_t k = 1; k <= 5; k += 2) {
20336 GemmMicrokernelTester()
20337 .mr(4)
20338 .nr(8)
20339 .kr(1)
20340 .sr(1)
20341 .m(4)
20342 .n(8)
20343 .k(k)
20344 .ks(3)
20345 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20346 }
20347 }
20348
20349 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, small_kernel_subtile) {
20350 TEST_REQUIRES_X86_FMA3;
20351 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020352 for (uint32_t n = 1; n <= 8; n++) {
20353 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020354 GemmMicrokernelTester()
20355 .mr(4)
20356 .nr(8)
20357 .kr(1)
20358 .sr(1)
20359 .m(m)
20360 .n(n)
20361 .k(k)
20362 .ks(3)
20363 .iterations(1)
20364 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20365 }
20366 }
20367 }
20368 }
20369
20370 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
20371 TEST_REQUIRES_X86_FMA3;
20372 for (uint32_t n = 9; n < 16; n++) {
20373 for (size_t k = 1; k <= 5; k += 2) {
20374 GemmMicrokernelTester()
20375 .mr(4)
20376 .nr(8)
20377 .kr(1)
20378 .sr(1)
20379 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020380 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020381 .k(k)
20382 .ks(3)
20383 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20384 }
20385 }
20386 }
20387
20388 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, n_div_8_small_kernel) {
20389 TEST_REQUIRES_X86_FMA3;
20390 for (uint32_t n = 16; n <= 24; n += 8) {
20391 for (size_t k = 1; k <= 5; k += 2) {
20392 GemmMicrokernelTester()
20393 .mr(4)
20394 .nr(8)
20395 .kr(1)
20396 .sr(1)
20397 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020398 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020399 .k(k)
20400 .ks(3)
20401 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20402 }
20403 }
20404 }
20405
20406 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cm_subtile) {
20407 TEST_REQUIRES_X86_FMA3;
20408 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020409 for (uint32_t n = 1; n <= 8; n++) {
20410 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020411 GemmMicrokernelTester()
20412 .mr(4)
20413 .nr(8)
20414 .kr(1)
20415 .sr(1)
20416 .m(m)
20417 .n(n)
20418 .k(k)
20419 .cm_stride(11)
20420 .iterations(1)
20421 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20422 }
20423 }
20424 }
20425 }
20426
20427 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, a_offset) {
20428 TEST_REQUIRES_X86_FMA3;
20429 for (size_t k = 1; k <= 5; k += 2) {
20430 GemmMicrokernelTester()
20431 .mr(4)
20432 .nr(8)
20433 .kr(1)
20434 .sr(1)
20435 .m(4)
20436 .n(8)
20437 .k(k)
20438 .ks(3)
20439 .a_offset(23)
20440 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20441 }
20442 }
20443
20444 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, zero) {
20445 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020446 for (size_t k = 1; k <= 5; k += 2) {
20447 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020448 GemmMicrokernelTester()
20449 .mr(4)
20450 .nr(8)
20451 .kr(1)
20452 .sr(1)
20453 .m(4)
20454 .n(8)
20455 .k(k)
20456 .ks(3)
20457 .a_offset(23)
20458 .zero_index(mz)
20459 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20460 }
20461 }
20462 }
20463
20464 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, qmin) {
20465 TEST_REQUIRES_X86_FMA3;
20466 GemmMicrokernelTester()
20467 .mr(4)
20468 .nr(8)
20469 .kr(1)
20470 .sr(1)
20471 .m(4)
20472 .n(8)
20473 .k(1)
20474 .qmin(128)
20475 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20476 }
20477
20478 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, qmax) {
20479 TEST_REQUIRES_X86_FMA3;
20480 GemmMicrokernelTester()
20481 .mr(4)
20482 .nr(8)
20483 .kr(1)
20484 .sr(1)
20485 .m(4)
20486 .n(8)
20487 .k(1)
20488 .qmax(128)
20489 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20490 }
20491
20492 TEST(F32_IGEMM_MINMAX_4X8__FMA3_BROADCAST, strided_cm) {
20493 TEST_REQUIRES_X86_FMA3;
20494 GemmMicrokernelTester()
20495 .mr(4)
20496 .nr(8)
20497 .kr(1)
20498 .sr(1)
20499 .m(4)
20500 .n(8)
20501 .k(1)
20502 .cm_stride(11)
20503 .Test(xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20504 }
20505#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20506
20507
20508#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20509 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1) {
20510 TEST_REQUIRES_X86_FMA3;
20511 GemmMicrokernelTester()
20512 .mr(5)
20513 .nr(8)
20514 .kr(1)
20515 .sr(1)
20516 .m(5)
20517 .n(8)
20518 .k(1)
20519 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20520 }
20521
20522 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cn) {
20523 TEST_REQUIRES_X86_FMA3;
20524 GemmMicrokernelTester()
20525 .mr(5)
20526 .nr(8)
20527 .kr(1)
20528 .sr(1)
20529 .m(5)
20530 .n(8)
20531 .k(1)
20532 .cn_stride(11)
20533 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20534 }
20535
20536 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile) {
20537 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020538 for (uint32_t n = 1; n <= 8; n++) {
20539 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020540 GemmMicrokernelTester()
20541 .mr(5)
20542 .nr(8)
20543 .kr(1)
20544 .sr(1)
20545 .m(m)
20546 .n(n)
20547 .k(1)
20548 .iterations(1)
20549 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20550 }
20551 }
20552 }
20553
20554 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_m) {
20555 TEST_REQUIRES_X86_FMA3;
20556 for (uint32_t m = 1; m <= 5; m++) {
20557 GemmMicrokernelTester()
20558 .mr(5)
20559 .nr(8)
20560 .kr(1)
20561 .sr(1)
20562 .m(m)
20563 .n(8)
20564 .k(1)
20565 .iterations(1)
20566 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20567 }
20568 }
20569
20570 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_eq_1_subtile_n) {
20571 TEST_REQUIRES_X86_FMA3;
20572 for (uint32_t n = 1; n <= 8; n++) {
20573 GemmMicrokernelTester()
20574 .mr(5)
20575 .nr(8)
20576 .kr(1)
20577 .sr(1)
20578 .m(5)
20579 .n(n)
20580 .k(1)
20581 .iterations(1)
20582 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20583 }
20584 }
20585
20586 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1) {
20587 TEST_REQUIRES_X86_FMA3;
20588 for (size_t k = 2; k < 10; k++) {
20589 GemmMicrokernelTester()
20590 .mr(5)
20591 .nr(8)
20592 .kr(1)
20593 .sr(1)
20594 .m(5)
20595 .n(8)
20596 .k(k)
20597 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20598 }
20599 }
20600
20601 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, k_gt_1_subtile) {
20602 TEST_REQUIRES_X86_FMA3;
20603 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020604 for (uint32_t n = 1; n <= 8; n++) {
20605 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020606 GemmMicrokernelTester()
20607 .mr(5)
20608 .nr(8)
20609 .kr(1)
20610 .sr(1)
20611 .m(m)
20612 .n(n)
20613 .k(k)
20614 .iterations(1)
20615 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20616 }
20617 }
20618 }
20619 }
20620
20621 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8) {
20622 TEST_REQUIRES_X86_FMA3;
20623 for (uint32_t n = 9; n < 16; n++) {
20624 for (size_t k = 1; k <= 5; k += 2) {
20625 GemmMicrokernelTester()
20626 .mr(5)
20627 .nr(8)
20628 .kr(1)
20629 .sr(1)
20630 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020631 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020632 .k(k)
20633 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20634 }
20635 }
20636 }
20637
20638 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_strided_cn) {
20639 TEST_REQUIRES_X86_FMA3;
20640 for (uint32_t n = 9; n < 16; n++) {
20641 for (size_t k = 1; k <= 5; k += 2) {
20642 GemmMicrokernelTester()
20643 .mr(5)
20644 .nr(8)
20645 .kr(1)
20646 .sr(1)
20647 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020648 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020649 .k(k)
20650 .cn_stride(11)
20651 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20652 }
20653 }
20654 }
20655
20656 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_subtile) {
20657 TEST_REQUIRES_X86_FMA3;
20658 for (uint32_t n = 9; n < 16; n++) {
20659 for (size_t k = 1; k <= 5; k += 2) {
20660 for (uint32_t m = 1; m <= 5; m++) {
20661 GemmMicrokernelTester()
20662 .mr(5)
20663 .nr(8)
20664 .kr(1)
20665 .sr(1)
20666 .m(m)
20667 .n(n)
20668 .k(k)
20669 .iterations(1)
20670 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20671 }
20672 }
20673 }
20674 }
20675
20676 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8) {
20677 TEST_REQUIRES_X86_FMA3;
20678 for (uint32_t n = 16; n <= 24; n += 8) {
20679 for (size_t k = 1; k <= 5; k += 2) {
20680 GemmMicrokernelTester()
20681 .mr(5)
20682 .nr(8)
20683 .kr(1)
20684 .sr(1)
20685 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020686 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020687 .k(k)
20688 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20689 }
20690 }
20691 }
20692
20693 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_strided_cn) {
20694 TEST_REQUIRES_X86_FMA3;
20695 for (uint32_t n = 16; n <= 24; n += 8) {
20696 for (size_t k = 1; k <= 5; k += 2) {
20697 GemmMicrokernelTester()
20698 .mr(5)
20699 .nr(8)
20700 .kr(1)
20701 .sr(1)
20702 .m(5)
20703 .n(n)
20704 .k(k)
20705 .cn_stride(11)
20706 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20707 }
20708 }
20709 }
20710
20711 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_subtile) {
20712 TEST_REQUIRES_X86_FMA3;
20713 for (uint32_t n = 16; n <= 24; n += 8) {
20714 for (size_t k = 1; k <= 5; k += 2) {
20715 for (uint32_t m = 1; m <= 5; m++) {
20716 GemmMicrokernelTester()
20717 .mr(5)
20718 .nr(8)
20719 .kr(1)
20720 .sr(1)
20721 .m(m)
20722 .n(n)
20723 .k(k)
20724 .iterations(1)
20725 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20726 }
20727 }
20728 }
20729 }
20730
20731 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, small_kernel) {
20732 TEST_REQUIRES_X86_FMA3;
20733 for (size_t k = 1; k <= 5; k += 2) {
20734 GemmMicrokernelTester()
20735 .mr(5)
20736 .nr(8)
20737 .kr(1)
20738 .sr(1)
20739 .m(5)
20740 .n(8)
20741 .k(k)
20742 .ks(3)
20743 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20744 }
20745 }
20746
20747 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, small_kernel_subtile) {
20748 TEST_REQUIRES_X86_FMA3;
20749 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020750 for (uint32_t n = 1; n <= 8; n++) {
20751 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020752 GemmMicrokernelTester()
20753 .mr(5)
20754 .nr(8)
20755 .kr(1)
20756 .sr(1)
20757 .m(m)
20758 .n(n)
20759 .k(k)
20760 .ks(3)
20761 .iterations(1)
20762 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20763 }
20764 }
20765 }
20766 }
20767
20768 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_gt_8_small_kernel) {
20769 TEST_REQUIRES_X86_FMA3;
20770 for (uint32_t n = 9; n < 16; n++) {
20771 for (size_t k = 1; k <= 5; k += 2) {
20772 GemmMicrokernelTester()
20773 .mr(5)
20774 .nr(8)
20775 .kr(1)
20776 .sr(1)
20777 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020778 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020779 .k(k)
20780 .ks(3)
20781 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20782 }
20783 }
20784 }
20785
20786 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, n_div_8_small_kernel) {
20787 TEST_REQUIRES_X86_FMA3;
20788 for (uint32_t n = 16; n <= 24; n += 8) {
20789 for (size_t k = 1; k <= 5; k += 2) {
20790 GemmMicrokernelTester()
20791 .mr(5)
20792 .nr(8)
20793 .kr(1)
20794 .sr(1)
20795 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020796 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020797 .k(k)
20798 .ks(3)
20799 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20800 }
20801 }
20802 }
20803
20804 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cm_subtile) {
20805 TEST_REQUIRES_X86_FMA3;
20806 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020807 for (uint32_t n = 1; n <= 8; n++) {
20808 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020809 GemmMicrokernelTester()
20810 .mr(5)
20811 .nr(8)
20812 .kr(1)
20813 .sr(1)
20814 .m(m)
20815 .n(n)
20816 .k(k)
20817 .cm_stride(11)
20818 .iterations(1)
20819 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20820 }
20821 }
20822 }
20823 }
20824
20825 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, a_offset) {
20826 TEST_REQUIRES_X86_FMA3;
20827 for (size_t k = 1; k <= 5; k += 2) {
20828 GemmMicrokernelTester()
20829 .mr(5)
20830 .nr(8)
20831 .kr(1)
20832 .sr(1)
20833 .m(5)
20834 .n(8)
20835 .k(k)
20836 .ks(3)
20837 .a_offset(29)
20838 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20839 }
20840 }
20841
20842 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, zero) {
20843 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020844 for (size_t k = 1; k <= 5; k += 2) {
20845 for (uint32_t mz = 0; mz < 5; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020846 GemmMicrokernelTester()
20847 .mr(5)
20848 .nr(8)
20849 .kr(1)
20850 .sr(1)
20851 .m(5)
20852 .n(8)
20853 .k(k)
20854 .ks(3)
20855 .a_offset(29)
20856 .zero_index(mz)
20857 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20858 }
20859 }
20860 }
20861
20862 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, qmin) {
20863 TEST_REQUIRES_X86_FMA3;
20864 GemmMicrokernelTester()
20865 .mr(5)
20866 .nr(8)
20867 .kr(1)
20868 .sr(1)
20869 .m(5)
20870 .n(8)
20871 .k(1)
20872 .qmin(128)
20873 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20874 }
20875
20876 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, qmax) {
20877 TEST_REQUIRES_X86_FMA3;
20878 GemmMicrokernelTester()
20879 .mr(5)
20880 .nr(8)
20881 .kr(1)
20882 .sr(1)
20883 .m(5)
20884 .n(8)
20885 .k(1)
20886 .qmax(128)
20887 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20888 }
20889
20890 TEST(F32_IGEMM_MINMAX_5X8__FMA3_BROADCAST, strided_cm) {
20891 TEST_REQUIRES_X86_FMA3;
20892 GemmMicrokernelTester()
20893 .mr(5)
20894 .nr(8)
20895 .kr(1)
20896 .sr(1)
20897 .m(5)
20898 .n(8)
20899 .k(1)
20900 .cm_stride(11)
20901 .Test(xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20902 }
20903#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20904
20905
20906#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20907 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1) {
20908 TEST_REQUIRES_X86_FMA3;
20909 GemmMicrokernelTester()
20910 .mr(1)
20911 .nr(16)
20912 .kr(1)
20913 .sr(1)
20914 .m(1)
20915 .n(16)
20916 .k(1)
20917 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20918 }
20919
20920 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cn) {
20921 TEST_REQUIRES_X86_FMA3;
20922 GemmMicrokernelTester()
20923 .mr(1)
20924 .nr(16)
20925 .kr(1)
20926 .sr(1)
20927 .m(1)
20928 .n(16)
20929 .k(1)
20930 .cn_stride(19)
20931 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20932 }
20933
20934 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile) {
20935 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020936 for (uint32_t n = 1; n <= 16; n++) {
20937 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020938 GemmMicrokernelTester()
20939 .mr(1)
20940 .nr(16)
20941 .kr(1)
20942 .sr(1)
20943 .m(m)
20944 .n(n)
20945 .k(1)
20946 .iterations(1)
20947 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20948 }
20949 }
20950 }
20951
20952 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
20953 TEST_REQUIRES_X86_FMA3;
20954 for (uint32_t m = 1; m <= 1; m++) {
20955 GemmMicrokernelTester()
20956 .mr(1)
20957 .nr(16)
20958 .kr(1)
20959 .sr(1)
20960 .m(m)
20961 .n(16)
20962 .k(1)
20963 .iterations(1)
20964 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20965 }
20966 }
20967
20968 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
20969 TEST_REQUIRES_X86_FMA3;
20970 for (uint32_t n = 1; n <= 16; n++) {
20971 GemmMicrokernelTester()
20972 .mr(1)
20973 .nr(16)
20974 .kr(1)
20975 .sr(1)
20976 .m(1)
20977 .n(n)
20978 .k(1)
20979 .iterations(1)
20980 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20981 }
20982 }
20983
20984 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1) {
20985 TEST_REQUIRES_X86_FMA3;
20986 for (size_t k = 2; k < 10; k++) {
20987 GemmMicrokernelTester()
20988 .mr(1)
20989 .nr(16)
20990 .kr(1)
20991 .sr(1)
20992 .m(1)
20993 .n(16)
20994 .k(k)
20995 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
20996 }
20997 }
20998
20999 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, k_gt_1_subtile) {
21000 TEST_REQUIRES_X86_FMA3;
21001 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021002 for (uint32_t n = 1; n <= 16; n++) {
21003 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021004 GemmMicrokernelTester()
21005 .mr(1)
21006 .nr(16)
21007 .kr(1)
21008 .sr(1)
21009 .m(m)
21010 .n(n)
21011 .k(k)
21012 .iterations(1)
21013 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21014 }
21015 }
21016 }
21017 }
21018
21019 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16) {
21020 TEST_REQUIRES_X86_FMA3;
21021 for (uint32_t n = 17; n < 32; n++) {
21022 for (size_t k = 1; k <= 5; k += 2) {
21023 GemmMicrokernelTester()
21024 .mr(1)
21025 .nr(16)
21026 .kr(1)
21027 .sr(1)
21028 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021029 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021030 .k(k)
21031 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21032 }
21033 }
21034 }
21035
21036 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
21037 TEST_REQUIRES_X86_FMA3;
21038 for (uint32_t n = 17; n < 32; n++) {
21039 for (size_t k = 1; k <= 5; k += 2) {
21040 GemmMicrokernelTester()
21041 .mr(1)
21042 .nr(16)
21043 .kr(1)
21044 .sr(1)
21045 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021046 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021047 .k(k)
21048 .cn_stride(19)
21049 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21050 }
21051 }
21052 }
21053
21054 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_subtile) {
21055 TEST_REQUIRES_X86_FMA3;
21056 for (uint32_t n = 17; n < 32; n++) {
21057 for (size_t k = 1; k <= 5; k += 2) {
21058 for (uint32_t m = 1; m <= 1; m++) {
21059 GemmMicrokernelTester()
21060 .mr(1)
21061 .nr(16)
21062 .kr(1)
21063 .sr(1)
21064 .m(m)
21065 .n(n)
21066 .k(k)
21067 .iterations(1)
21068 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21069 }
21070 }
21071 }
21072 }
21073
21074 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16) {
21075 TEST_REQUIRES_X86_FMA3;
21076 for (uint32_t n = 32; n <= 48; n += 16) {
21077 for (size_t k = 1; k <= 5; k += 2) {
21078 GemmMicrokernelTester()
21079 .mr(1)
21080 .nr(16)
21081 .kr(1)
21082 .sr(1)
21083 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021084 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021085 .k(k)
21086 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21087 }
21088 }
21089 }
21090
21091 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_strided_cn) {
21092 TEST_REQUIRES_X86_FMA3;
21093 for (uint32_t n = 32; n <= 48; n += 16) {
21094 for (size_t k = 1; k <= 5; k += 2) {
21095 GemmMicrokernelTester()
21096 .mr(1)
21097 .nr(16)
21098 .kr(1)
21099 .sr(1)
21100 .m(1)
21101 .n(n)
21102 .k(k)
21103 .cn_stride(19)
21104 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21105 }
21106 }
21107 }
21108
21109 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_subtile) {
21110 TEST_REQUIRES_X86_FMA3;
21111 for (uint32_t n = 32; n <= 48; n += 16) {
21112 for (size_t k = 1; k <= 5; k += 2) {
21113 for (uint32_t m = 1; m <= 1; m++) {
21114 GemmMicrokernelTester()
21115 .mr(1)
21116 .nr(16)
21117 .kr(1)
21118 .sr(1)
21119 .m(m)
21120 .n(n)
21121 .k(k)
21122 .iterations(1)
21123 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21124 }
21125 }
21126 }
21127 }
21128
21129 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, small_kernel) {
21130 TEST_REQUIRES_X86_FMA3;
21131 for (size_t k = 1; k <= 5; k += 2) {
21132 GemmMicrokernelTester()
21133 .mr(1)
21134 .nr(16)
21135 .kr(1)
21136 .sr(1)
21137 .m(1)
21138 .n(16)
21139 .k(k)
21140 .ks(3)
21141 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21142 }
21143 }
21144
21145 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, small_kernel_subtile) {
21146 TEST_REQUIRES_X86_FMA3;
21147 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021148 for (uint32_t n = 1; n <= 16; n++) {
21149 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021150 GemmMicrokernelTester()
21151 .mr(1)
21152 .nr(16)
21153 .kr(1)
21154 .sr(1)
21155 .m(m)
21156 .n(n)
21157 .k(k)
21158 .ks(3)
21159 .iterations(1)
21160 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21161 }
21162 }
21163 }
21164 }
21165
21166 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
21167 TEST_REQUIRES_X86_FMA3;
21168 for (uint32_t n = 17; n < 32; n++) {
21169 for (size_t k = 1; k <= 5; k += 2) {
21170 GemmMicrokernelTester()
21171 .mr(1)
21172 .nr(16)
21173 .kr(1)
21174 .sr(1)
21175 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021176 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021177 .k(k)
21178 .ks(3)
21179 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21180 }
21181 }
21182 }
21183
21184 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, n_div_16_small_kernel) {
21185 TEST_REQUIRES_X86_FMA3;
21186 for (uint32_t n = 32; n <= 48; n += 16) {
21187 for (size_t k = 1; k <= 5; k += 2) {
21188 GemmMicrokernelTester()
21189 .mr(1)
21190 .nr(16)
21191 .kr(1)
21192 .sr(1)
21193 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021194 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021195 .k(k)
21196 .ks(3)
21197 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21198 }
21199 }
21200 }
21201
21202 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cm_subtile) {
21203 TEST_REQUIRES_X86_FMA3;
21204 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021205 for (uint32_t n = 1; n <= 16; n++) {
21206 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021207 GemmMicrokernelTester()
21208 .mr(1)
21209 .nr(16)
21210 .kr(1)
21211 .sr(1)
21212 .m(m)
21213 .n(n)
21214 .k(k)
21215 .cm_stride(19)
21216 .iterations(1)
21217 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21218 }
21219 }
21220 }
21221 }
21222
21223 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, a_offset) {
21224 TEST_REQUIRES_X86_FMA3;
21225 for (size_t k = 1; k <= 5; k += 2) {
21226 GemmMicrokernelTester()
21227 .mr(1)
21228 .nr(16)
21229 .kr(1)
21230 .sr(1)
21231 .m(1)
21232 .n(16)
21233 .k(k)
21234 .ks(3)
21235 .a_offset(7)
21236 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21237 }
21238 }
21239
21240 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, zero) {
21241 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021242 for (size_t k = 1; k <= 5; k += 2) {
21243 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021244 GemmMicrokernelTester()
21245 .mr(1)
21246 .nr(16)
21247 .kr(1)
21248 .sr(1)
21249 .m(1)
21250 .n(16)
21251 .k(k)
21252 .ks(3)
21253 .a_offset(7)
21254 .zero_index(mz)
21255 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21256 }
21257 }
21258 }
21259
21260 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, qmin) {
21261 TEST_REQUIRES_X86_FMA3;
21262 GemmMicrokernelTester()
21263 .mr(1)
21264 .nr(16)
21265 .kr(1)
21266 .sr(1)
21267 .m(1)
21268 .n(16)
21269 .k(1)
21270 .qmin(128)
21271 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21272 }
21273
21274 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, qmax) {
21275 TEST_REQUIRES_X86_FMA3;
21276 GemmMicrokernelTester()
21277 .mr(1)
21278 .nr(16)
21279 .kr(1)
21280 .sr(1)
21281 .m(1)
21282 .n(16)
21283 .k(1)
21284 .qmax(128)
21285 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21286 }
21287
21288 TEST(F32_IGEMM_MINMAX_1X16__FMA3_BROADCAST, strided_cm) {
21289 TEST_REQUIRES_X86_FMA3;
21290 GemmMicrokernelTester()
21291 .mr(1)
21292 .nr(16)
21293 .kr(1)
21294 .sr(1)
21295 .m(1)
21296 .n(16)
21297 .k(1)
21298 .cm_stride(19)
21299 .Test(xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21300 }
21301#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21302
21303
21304#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21305 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1) {
21306 TEST_REQUIRES_X86_FMA3;
21307 GemmMicrokernelTester()
21308 .mr(3)
21309 .nr(16)
21310 .kr(1)
21311 .sr(1)
21312 .m(3)
21313 .n(16)
21314 .k(1)
21315 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21316 }
21317
21318 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cn) {
21319 TEST_REQUIRES_X86_FMA3;
21320 GemmMicrokernelTester()
21321 .mr(3)
21322 .nr(16)
21323 .kr(1)
21324 .sr(1)
21325 .m(3)
21326 .n(16)
21327 .k(1)
21328 .cn_stride(19)
21329 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21330 }
21331
21332 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile) {
21333 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021334 for (uint32_t n = 1; n <= 16; n++) {
21335 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021336 GemmMicrokernelTester()
21337 .mr(3)
21338 .nr(16)
21339 .kr(1)
21340 .sr(1)
21341 .m(m)
21342 .n(n)
21343 .k(1)
21344 .iterations(1)
21345 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21346 }
21347 }
21348 }
21349
21350 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_m) {
21351 TEST_REQUIRES_X86_FMA3;
21352 for (uint32_t m = 1; m <= 3; m++) {
21353 GemmMicrokernelTester()
21354 .mr(3)
21355 .nr(16)
21356 .kr(1)
21357 .sr(1)
21358 .m(m)
21359 .n(16)
21360 .k(1)
21361 .iterations(1)
21362 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21363 }
21364 }
21365
21366 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_eq_1_subtile_n) {
21367 TEST_REQUIRES_X86_FMA3;
21368 for (uint32_t n = 1; n <= 16; n++) {
21369 GemmMicrokernelTester()
21370 .mr(3)
21371 .nr(16)
21372 .kr(1)
21373 .sr(1)
21374 .m(3)
21375 .n(n)
21376 .k(1)
21377 .iterations(1)
21378 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21379 }
21380 }
21381
21382 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1) {
21383 TEST_REQUIRES_X86_FMA3;
21384 for (size_t k = 2; k < 10; k++) {
21385 GemmMicrokernelTester()
21386 .mr(3)
21387 .nr(16)
21388 .kr(1)
21389 .sr(1)
21390 .m(3)
21391 .n(16)
21392 .k(k)
21393 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21394 }
21395 }
21396
21397 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, k_gt_1_subtile) {
21398 TEST_REQUIRES_X86_FMA3;
21399 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021400 for (uint32_t n = 1; n <= 16; n++) {
21401 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021402 GemmMicrokernelTester()
21403 .mr(3)
21404 .nr(16)
21405 .kr(1)
21406 .sr(1)
21407 .m(m)
21408 .n(n)
21409 .k(k)
21410 .iterations(1)
21411 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21412 }
21413 }
21414 }
21415 }
21416
21417 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16) {
21418 TEST_REQUIRES_X86_FMA3;
21419 for (uint32_t n = 17; n < 32; n++) {
21420 for (size_t k = 1; k <= 5; k += 2) {
21421 GemmMicrokernelTester()
21422 .mr(3)
21423 .nr(16)
21424 .kr(1)
21425 .sr(1)
21426 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021427 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021428 .k(k)
21429 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21430 }
21431 }
21432 }
21433
21434 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_strided_cn) {
21435 TEST_REQUIRES_X86_FMA3;
21436 for (uint32_t n = 17; n < 32; n++) {
21437 for (size_t k = 1; k <= 5; k += 2) {
21438 GemmMicrokernelTester()
21439 .mr(3)
21440 .nr(16)
21441 .kr(1)
21442 .sr(1)
21443 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021444 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021445 .k(k)
21446 .cn_stride(19)
21447 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21448 }
21449 }
21450 }
21451
21452 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_subtile) {
21453 TEST_REQUIRES_X86_FMA3;
21454 for (uint32_t n = 17; n < 32; n++) {
21455 for (size_t k = 1; k <= 5; k += 2) {
21456 for (uint32_t m = 1; m <= 3; m++) {
21457 GemmMicrokernelTester()
21458 .mr(3)
21459 .nr(16)
21460 .kr(1)
21461 .sr(1)
21462 .m(m)
21463 .n(n)
21464 .k(k)
21465 .iterations(1)
21466 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21467 }
21468 }
21469 }
21470 }
21471
21472 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16) {
21473 TEST_REQUIRES_X86_FMA3;
21474 for (uint32_t n = 32; n <= 48; n += 16) {
21475 for (size_t k = 1; k <= 5; k += 2) {
21476 GemmMicrokernelTester()
21477 .mr(3)
21478 .nr(16)
21479 .kr(1)
21480 .sr(1)
21481 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021482 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021483 .k(k)
21484 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21485 }
21486 }
21487 }
21488
21489 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_strided_cn) {
21490 TEST_REQUIRES_X86_FMA3;
21491 for (uint32_t n = 32; n <= 48; n += 16) {
21492 for (size_t k = 1; k <= 5; k += 2) {
21493 GemmMicrokernelTester()
21494 .mr(3)
21495 .nr(16)
21496 .kr(1)
21497 .sr(1)
21498 .m(3)
21499 .n(n)
21500 .k(k)
21501 .cn_stride(19)
21502 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21503 }
21504 }
21505 }
21506
21507 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_subtile) {
21508 TEST_REQUIRES_X86_FMA3;
21509 for (uint32_t n = 32; n <= 48; n += 16) {
21510 for (size_t k = 1; k <= 5; k += 2) {
21511 for (uint32_t m = 1; m <= 3; m++) {
21512 GemmMicrokernelTester()
21513 .mr(3)
21514 .nr(16)
21515 .kr(1)
21516 .sr(1)
21517 .m(m)
21518 .n(n)
21519 .k(k)
21520 .iterations(1)
21521 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21522 }
21523 }
21524 }
21525 }
21526
21527 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, small_kernel) {
21528 TEST_REQUIRES_X86_FMA3;
21529 for (size_t k = 1; k <= 5; k += 2) {
21530 GemmMicrokernelTester()
21531 .mr(3)
21532 .nr(16)
21533 .kr(1)
21534 .sr(1)
21535 .m(3)
21536 .n(16)
21537 .k(k)
21538 .ks(3)
21539 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21540 }
21541 }
21542
21543 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, small_kernel_subtile) {
21544 TEST_REQUIRES_X86_FMA3;
21545 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021546 for (uint32_t n = 1; n <= 16; n++) {
21547 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021548 GemmMicrokernelTester()
21549 .mr(3)
21550 .nr(16)
21551 .kr(1)
21552 .sr(1)
21553 .m(m)
21554 .n(n)
21555 .k(k)
21556 .ks(3)
21557 .iterations(1)
21558 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21559 }
21560 }
21561 }
21562 }
21563
21564 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_gt_16_small_kernel) {
21565 TEST_REQUIRES_X86_FMA3;
21566 for (uint32_t n = 17; n < 32; n++) {
21567 for (size_t k = 1; k <= 5; k += 2) {
21568 GemmMicrokernelTester()
21569 .mr(3)
21570 .nr(16)
21571 .kr(1)
21572 .sr(1)
21573 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021574 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021575 .k(k)
21576 .ks(3)
21577 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21578 }
21579 }
21580 }
21581
21582 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, n_div_16_small_kernel) {
21583 TEST_REQUIRES_X86_FMA3;
21584 for (uint32_t n = 32; n <= 48; n += 16) {
21585 for (size_t k = 1; k <= 5; k += 2) {
21586 GemmMicrokernelTester()
21587 .mr(3)
21588 .nr(16)
21589 .kr(1)
21590 .sr(1)
21591 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021592 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021593 .k(k)
21594 .ks(3)
21595 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21596 }
21597 }
21598 }
21599
21600 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cm_subtile) {
21601 TEST_REQUIRES_X86_FMA3;
21602 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021603 for (uint32_t n = 1; n <= 16; n++) {
21604 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021605 GemmMicrokernelTester()
21606 .mr(3)
21607 .nr(16)
21608 .kr(1)
21609 .sr(1)
21610 .m(m)
21611 .n(n)
21612 .k(k)
21613 .cm_stride(19)
21614 .iterations(1)
21615 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21616 }
21617 }
21618 }
21619 }
21620
21621 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, a_offset) {
21622 TEST_REQUIRES_X86_FMA3;
21623 for (size_t k = 1; k <= 5; k += 2) {
21624 GemmMicrokernelTester()
21625 .mr(3)
21626 .nr(16)
21627 .kr(1)
21628 .sr(1)
21629 .m(3)
21630 .n(16)
21631 .k(k)
21632 .ks(3)
21633 .a_offset(17)
21634 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21635 }
21636 }
21637
21638 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, zero) {
21639 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021640 for (size_t k = 1; k <= 5; k += 2) {
21641 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021642 GemmMicrokernelTester()
21643 .mr(3)
21644 .nr(16)
21645 .kr(1)
21646 .sr(1)
21647 .m(3)
21648 .n(16)
21649 .k(k)
21650 .ks(3)
21651 .a_offset(17)
21652 .zero_index(mz)
21653 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21654 }
21655 }
21656 }
21657
21658 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, qmin) {
21659 TEST_REQUIRES_X86_FMA3;
21660 GemmMicrokernelTester()
21661 .mr(3)
21662 .nr(16)
21663 .kr(1)
21664 .sr(1)
21665 .m(3)
21666 .n(16)
21667 .k(1)
21668 .qmin(128)
21669 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21670 }
21671
21672 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, qmax) {
21673 TEST_REQUIRES_X86_FMA3;
21674 GemmMicrokernelTester()
21675 .mr(3)
21676 .nr(16)
21677 .kr(1)
21678 .sr(1)
21679 .m(3)
21680 .n(16)
21681 .k(1)
21682 .qmax(128)
21683 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21684 }
21685
21686 TEST(F32_IGEMM_MINMAX_3X16__FMA3_BROADCAST, strided_cm) {
21687 TEST_REQUIRES_X86_FMA3;
21688 GemmMicrokernelTester()
21689 .mr(3)
21690 .nr(16)
21691 .kr(1)
21692 .sr(1)
21693 .m(3)
21694 .n(16)
21695 .k(1)
21696 .cm_stride(19)
21697 .Test(xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21698 }
21699#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21700
21701
21702#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21703 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4) {
21704 TEST_REQUIRES_X86_FMA3;
21705 GemmMicrokernelTester()
21706 .mr(1)
21707 .nr(16)
21708 .kr(1)
21709 .sr(4)
21710 .m(1)
21711 .n(16)
21712 .k(4)
21713 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21714 }
21715
21716 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, strided_cn) {
21717 TEST_REQUIRES_X86_FMA3;
21718 GemmMicrokernelTester()
21719 .mr(1)
21720 .nr(16)
21721 .kr(1)
21722 .sr(4)
21723 .m(1)
21724 .n(16)
21725 .k(4)
21726 .cn_stride(19)
21727 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21728 }
21729
21730 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
21731 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021732 for (uint32_t n = 1; n <= 16; n++) {
21733 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021734 GemmMicrokernelTester()
21735 .mr(1)
21736 .nr(16)
21737 .kr(1)
21738 .sr(4)
21739 .m(m)
21740 .n(n)
21741 .k(4)
21742 .iterations(1)
21743 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21744 }
21745 }
21746 }
21747
21748 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
21749 TEST_REQUIRES_X86_FMA3;
21750 for (uint32_t m = 1; m <= 1; m++) {
21751 GemmMicrokernelTester()
21752 .mr(1)
21753 .nr(16)
21754 .kr(1)
21755 .sr(4)
21756 .m(m)
21757 .n(16)
21758 .k(4)
21759 .iterations(1)
21760 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21761 }
21762 }
21763
21764 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
21765 TEST_REQUIRES_X86_FMA3;
21766 for (uint32_t n = 1; n <= 16; n++) {
21767 GemmMicrokernelTester()
21768 .mr(1)
21769 .nr(16)
21770 .kr(1)
21771 .sr(4)
21772 .m(1)
21773 .n(n)
21774 .k(4)
21775 .iterations(1)
21776 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21777 }
21778 }
21779
21780 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4) {
21781 TEST_REQUIRES_X86_FMA3;
21782 for (size_t k = 1; k < 4; k++) {
21783 GemmMicrokernelTester()
21784 .mr(1)
21785 .nr(16)
21786 .kr(1)
21787 .sr(4)
21788 .m(1)
21789 .n(16)
21790 .k(k)
21791 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21792 }
21793 }
21794
21795 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
21796 TEST_REQUIRES_X86_FMA3;
21797 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021798 for (uint32_t n = 1; n <= 16; n++) {
21799 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021800 GemmMicrokernelTester()
21801 .mr(1)
21802 .nr(16)
21803 .kr(1)
21804 .sr(4)
21805 .m(m)
21806 .n(n)
21807 .k(k)
21808 .iterations(1)
21809 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21810 }
21811 }
21812 }
21813 }
21814
21815 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4) {
21816 TEST_REQUIRES_X86_FMA3;
21817 for (size_t k = 5; k < 8; k++) {
21818 GemmMicrokernelTester()
21819 .mr(1)
21820 .nr(16)
21821 .kr(1)
21822 .sr(4)
21823 .m(1)
21824 .n(16)
21825 .k(k)
21826 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21827 }
21828 }
21829
21830 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
21831 TEST_REQUIRES_X86_FMA3;
21832 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021833 for (uint32_t n = 1; n <= 16; n++) {
21834 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021835 GemmMicrokernelTester()
21836 .mr(1)
21837 .nr(16)
21838 .kr(1)
21839 .sr(4)
21840 .m(m)
21841 .n(n)
21842 .k(k)
21843 .iterations(1)
21844 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21845 }
21846 }
21847 }
21848 }
21849
21850 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4) {
21851 TEST_REQUIRES_X86_FMA3;
21852 for (size_t k = 8; k <= 40; k += 4) {
21853 GemmMicrokernelTester()
21854 .mr(1)
21855 .nr(16)
21856 .kr(1)
21857 .sr(4)
21858 .m(1)
21859 .n(16)
21860 .k(k)
21861 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21862 }
21863 }
21864
21865 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, k_div_4_subtile) {
21866 TEST_REQUIRES_X86_FMA3;
21867 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021868 for (uint32_t n = 1; n <= 16; n++) {
21869 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021870 GemmMicrokernelTester()
21871 .mr(1)
21872 .nr(16)
21873 .kr(1)
21874 .sr(4)
21875 .m(m)
21876 .n(n)
21877 .k(k)
21878 .iterations(1)
21879 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21880 }
21881 }
21882 }
21883 }
21884
21885 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16) {
21886 TEST_REQUIRES_X86_FMA3;
21887 for (uint32_t n = 17; n < 32; n++) {
21888 for (size_t k = 1; k <= 20; k += 5) {
21889 GemmMicrokernelTester()
21890 .mr(1)
21891 .nr(16)
21892 .kr(1)
21893 .sr(4)
21894 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021895 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021896 .k(k)
21897 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21898 }
21899 }
21900 }
21901
21902 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
21903 TEST_REQUIRES_X86_FMA3;
21904 for (uint32_t n = 17; n < 32; n++) {
21905 for (size_t k = 1; k <= 20; k += 5) {
21906 GemmMicrokernelTester()
21907 .mr(1)
21908 .nr(16)
21909 .kr(1)
21910 .sr(4)
21911 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021912 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021913 .k(k)
21914 .cn_stride(19)
21915 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21916 }
21917 }
21918 }
21919
21920 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
21921 TEST_REQUIRES_X86_FMA3;
21922 for (uint32_t n = 17; n < 32; n++) {
21923 for (size_t k = 1; k <= 20; k += 5) {
21924 for (uint32_t m = 1; m <= 1; m++) {
21925 GemmMicrokernelTester()
21926 .mr(1)
21927 .nr(16)
21928 .kr(1)
21929 .sr(4)
21930 .m(m)
21931 .n(n)
21932 .k(k)
21933 .iterations(1)
21934 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21935 }
21936 }
21937 }
21938 }
21939
21940 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16) {
21941 TEST_REQUIRES_X86_FMA3;
21942 for (uint32_t n = 32; n <= 48; n += 16) {
21943 for (size_t k = 1; k <= 20; k += 5) {
21944 GemmMicrokernelTester()
21945 .mr(1)
21946 .nr(16)
21947 .kr(1)
21948 .sr(4)
21949 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021950 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021951 .k(k)
21952 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21953 }
21954 }
21955 }
21956
21957 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
21958 TEST_REQUIRES_X86_FMA3;
21959 for (uint32_t n = 32; n <= 48; n += 16) {
21960 for (size_t k = 1; k <= 20; k += 5) {
21961 GemmMicrokernelTester()
21962 .mr(1)
21963 .nr(16)
21964 .kr(1)
21965 .sr(4)
21966 .m(1)
21967 .n(n)
21968 .k(k)
21969 .cn_stride(19)
21970 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21971 }
21972 }
21973 }
21974
21975 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_subtile) {
21976 TEST_REQUIRES_X86_FMA3;
21977 for (uint32_t n = 32; n <= 48; n += 16) {
21978 for (size_t k = 1; k <= 20; k += 5) {
21979 for (uint32_t m = 1; m <= 1; m++) {
21980 GemmMicrokernelTester()
21981 .mr(1)
21982 .nr(16)
21983 .kr(1)
21984 .sr(4)
21985 .m(m)
21986 .n(n)
21987 .k(k)
21988 .iterations(1)
21989 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
21990 }
21991 }
21992 }
21993 }
21994
21995 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, small_kernel) {
21996 TEST_REQUIRES_X86_FMA3;
21997 for (size_t k = 1; k <= 20; k += 5) {
21998 GemmMicrokernelTester()
21999 .mr(1)
22000 .nr(16)
22001 .kr(1)
22002 .sr(4)
22003 .m(1)
22004 .n(16)
22005 .k(k)
22006 .ks(3)
22007 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22008 }
22009 }
22010
22011 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, small_kernel_subtile) {
22012 TEST_REQUIRES_X86_FMA3;
22013 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022014 for (uint32_t n = 1; n <= 16; n++) {
22015 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022016 GemmMicrokernelTester()
22017 .mr(1)
22018 .nr(16)
22019 .kr(1)
22020 .sr(4)
22021 .m(m)
22022 .n(n)
22023 .k(k)
22024 .ks(3)
22025 .iterations(1)
22026 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22027 }
22028 }
22029 }
22030 }
22031
22032 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
22033 TEST_REQUIRES_X86_FMA3;
22034 for (uint32_t n = 17; n < 32; n++) {
22035 for (size_t k = 1; k <= 20; k += 5) {
22036 GemmMicrokernelTester()
22037 .mr(1)
22038 .nr(16)
22039 .kr(1)
22040 .sr(4)
22041 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022042 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022043 .k(k)
22044 .ks(3)
22045 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22046 }
22047 }
22048 }
22049
22050 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
22051 TEST_REQUIRES_X86_FMA3;
22052 for (uint32_t n = 32; n <= 48; n += 16) {
22053 for (size_t k = 1; k <= 20; k += 5) {
22054 GemmMicrokernelTester()
22055 .mr(1)
22056 .nr(16)
22057 .kr(1)
22058 .sr(4)
22059 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022060 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022061 .k(k)
22062 .ks(3)
22063 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22064 }
22065 }
22066 }
22067
22068 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm_subtile) {
22069 TEST_REQUIRES_X86_FMA3;
22070 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022071 for (uint32_t n = 1; n <= 16; n++) {
22072 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022073 GemmMicrokernelTester()
22074 .mr(1)
22075 .nr(16)
22076 .kr(1)
22077 .sr(4)
22078 .m(m)
22079 .n(n)
22080 .k(k)
22081 .cm_stride(19)
22082 .iterations(1)
22083 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22084 }
22085 }
22086 }
22087 }
22088
22089 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, a_offset) {
22090 TEST_REQUIRES_X86_FMA3;
22091 for (size_t k = 1; k <= 20; k += 5) {
22092 GemmMicrokernelTester()
22093 .mr(1)
22094 .nr(16)
22095 .kr(1)
22096 .sr(4)
22097 .m(1)
22098 .n(16)
22099 .k(k)
22100 .ks(3)
22101 .a_offset(23)
22102 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22103 }
22104 }
22105
22106 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, zero) {
22107 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022108 for (size_t k = 1; k <= 20; k += 5) {
22109 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022110 GemmMicrokernelTester()
22111 .mr(1)
22112 .nr(16)
22113 .kr(1)
22114 .sr(4)
22115 .m(1)
22116 .n(16)
22117 .k(k)
22118 .ks(3)
22119 .a_offset(23)
22120 .zero_index(mz)
22121 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22122 }
22123 }
22124 }
22125
22126 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, qmin) {
22127 TEST_REQUIRES_X86_FMA3;
22128 GemmMicrokernelTester()
22129 .mr(1)
22130 .nr(16)
22131 .kr(1)
22132 .sr(4)
22133 .m(1)
22134 .n(16)
22135 .k(4)
22136 .qmin(128)
22137 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22138 }
22139
22140 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, qmax) {
22141 TEST_REQUIRES_X86_FMA3;
22142 GemmMicrokernelTester()
22143 .mr(1)
22144 .nr(16)
22145 .kr(1)
22146 .sr(4)
22147 .m(1)
22148 .n(16)
22149 .k(4)
22150 .qmax(128)
22151 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22152 }
22153
22154 TEST(F32_IGEMM_MINMAX_1X16S4__FMA3_BROADCAST, strided_cm) {
22155 TEST_REQUIRES_X86_FMA3;
22156 GemmMicrokernelTester()
22157 .mr(1)
22158 .nr(16)
22159 .kr(1)
22160 .sr(4)
22161 .m(1)
22162 .n(16)
22163 .k(4)
22164 .cm_stride(19)
22165 .Test(xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22166 }
22167#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22168
22169
22170#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22171 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4) {
22172 TEST_REQUIRES_X86_FMA3;
22173 GemmMicrokernelTester()
22174 .mr(3)
22175 .nr(16)
22176 .kr(1)
22177 .sr(4)
22178 .m(3)
22179 .n(16)
22180 .k(4)
22181 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22182 }
22183
22184 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, strided_cn) {
22185 TEST_REQUIRES_X86_FMA3;
22186 GemmMicrokernelTester()
22187 .mr(3)
22188 .nr(16)
22189 .kr(1)
22190 .sr(4)
22191 .m(3)
22192 .n(16)
22193 .k(4)
22194 .cn_stride(19)
22195 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22196 }
22197
22198 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
22199 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022200 for (uint32_t n = 1; n <= 16; n++) {
22201 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022202 GemmMicrokernelTester()
22203 .mr(3)
22204 .nr(16)
22205 .kr(1)
22206 .sr(4)
22207 .m(m)
22208 .n(n)
22209 .k(4)
22210 .iterations(1)
22211 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22212 }
22213 }
22214 }
22215
22216 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
22217 TEST_REQUIRES_X86_FMA3;
22218 for (uint32_t m = 1; m <= 3; m++) {
22219 GemmMicrokernelTester()
22220 .mr(3)
22221 .nr(16)
22222 .kr(1)
22223 .sr(4)
22224 .m(m)
22225 .n(16)
22226 .k(4)
22227 .iterations(1)
22228 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22229 }
22230 }
22231
22232 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
22233 TEST_REQUIRES_X86_FMA3;
22234 for (uint32_t n = 1; n <= 16; n++) {
22235 GemmMicrokernelTester()
22236 .mr(3)
22237 .nr(16)
22238 .kr(1)
22239 .sr(4)
22240 .m(3)
22241 .n(n)
22242 .k(4)
22243 .iterations(1)
22244 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22245 }
22246 }
22247
22248 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4) {
22249 TEST_REQUIRES_X86_FMA3;
22250 for (size_t k = 1; k < 4; k++) {
22251 GemmMicrokernelTester()
22252 .mr(3)
22253 .nr(16)
22254 .kr(1)
22255 .sr(4)
22256 .m(3)
22257 .n(16)
22258 .k(k)
22259 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22260 }
22261 }
22262
22263 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
22264 TEST_REQUIRES_X86_FMA3;
22265 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022266 for (uint32_t n = 1; n <= 16; n++) {
22267 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022268 GemmMicrokernelTester()
22269 .mr(3)
22270 .nr(16)
22271 .kr(1)
22272 .sr(4)
22273 .m(m)
22274 .n(n)
22275 .k(k)
22276 .iterations(1)
22277 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22278 }
22279 }
22280 }
22281 }
22282
22283 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4) {
22284 TEST_REQUIRES_X86_FMA3;
22285 for (size_t k = 5; k < 8; k++) {
22286 GemmMicrokernelTester()
22287 .mr(3)
22288 .nr(16)
22289 .kr(1)
22290 .sr(4)
22291 .m(3)
22292 .n(16)
22293 .k(k)
22294 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22295 }
22296 }
22297
22298 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
22299 TEST_REQUIRES_X86_FMA3;
22300 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022301 for (uint32_t n = 1; n <= 16; n++) {
22302 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022303 GemmMicrokernelTester()
22304 .mr(3)
22305 .nr(16)
22306 .kr(1)
22307 .sr(4)
22308 .m(m)
22309 .n(n)
22310 .k(k)
22311 .iterations(1)
22312 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22313 }
22314 }
22315 }
22316 }
22317
22318 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4) {
22319 TEST_REQUIRES_X86_FMA3;
22320 for (size_t k = 8; k <= 40; k += 4) {
22321 GemmMicrokernelTester()
22322 .mr(3)
22323 .nr(16)
22324 .kr(1)
22325 .sr(4)
22326 .m(3)
22327 .n(16)
22328 .k(k)
22329 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22330 }
22331 }
22332
22333 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, k_div_4_subtile) {
22334 TEST_REQUIRES_X86_FMA3;
22335 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022336 for (uint32_t n = 1; n <= 16; n++) {
22337 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022338 GemmMicrokernelTester()
22339 .mr(3)
22340 .nr(16)
22341 .kr(1)
22342 .sr(4)
22343 .m(m)
22344 .n(n)
22345 .k(k)
22346 .iterations(1)
22347 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22348 }
22349 }
22350 }
22351 }
22352
22353 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16) {
22354 TEST_REQUIRES_X86_FMA3;
22355 for (uint32_t n = 17; n < 32; n++) {
22356 for (size_t k = 1; k <= 20; k += 5) {
22357 GemmMicrokernelTester()
22358 .mr(3)
22359 .nr(16)
22360 .kr(1)
22361 .sr(4)
22362 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022363 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022364 .k(k)
22365 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22366 }
22367 }
22368 }
22369
22370 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
22371 TEST_REQUIRES_X86_FMA3;
22372 for (uint32_t n = 17; n < 32; n++) {
22373 for (size_t k = 1; k <= 20; k += 5) {
22374 GemmMicrokernelTester()
22375 .mr(3)
22376 .nr(16)
22377 .kr(1)
22378 .sr(4)
22379 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022380 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022381 .k(k)
22382 .cn_stride(19)
22383 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22384 }
22385 }
22386 }
22387
22388 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
22389 TEST_REQUIRES_X86_FMA3;
22390 for (uint32_t n = 17; n < 32; n++) {
22391 for (size_t k = 1; k <= 20; k += 5) {
22392 for (uint32_t m = 1; m <= 3; m++) {
22393 GemmMicrokernelTester()
22394 .mr(3)
22395 .nr(16)
22396 .kr(1)
22397 .sr(4)
22398 .m(m)
22399 .n(n)
22400 .k(k)
22401 .iterations(1)
22402 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22403 }
22404 }
22405 }
22406 }
22407
22408 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16) {
22409 TEST_REQUIRES_X86_FMA3;
22410 for (uint32_t n = 32; n <= 48; n += 16) {
22411 for (size_t k = 1; k <= 20; k += 5) {
22412 GemmMicrokernelTester()
22413 .mr(3)
22414 .nr(16)
22415 .kr(1)
22416 .sr(4)
22417 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022418 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022419 .k(k)
22420 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22421 }
22422 }
22423 }
22424
22425 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
22426 TEST_REQUIRES_X86_FMA3;
22427 for (uint32_t n = 32; n <= 48; n += 16) {
22428 for (size_t k = 1; k <= 20; k += 5) {
22429 GemmMicrokernelTester()
22430 .mr(3)
22431 .nr(16)
22432 .kr(1)
22433 .sr(4)
22434 .m(3)
22435 .n(n)
22436 .k(k)
22437 .cn_stride(19)
22438 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22439 }
22440 }
22441 }
22442
22443 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_subtile) {
22444 TEST_REQUIRES_X86_FMA3;
22445 for (uint32_t n = 32; n <= 48; n += 16) {
22446 for (size_t k = 1; k <= 20; k += 5) {
22447 for (uint32_t m = 1; m <= 3; m++) {
22448 GemmMicrokernelTester()
22449 .mr(3)
22450 .nr(16)
22451 .kr(1)
22452 .sr(4)
22453 .m(m)
22454 .n(n)
22455 .k(k)
22456 .iterations(1)
22457 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22458 }
22459 }
22460 }
22461 }
22462
22463 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, small_kernel) {
22464 TEST_REQUIRES_X86_FMA3;
22465 for (size_t k = 1; k <= 20; k += 5) {
22466 GemmMicrokernelTester()
22467 .mr(3)
22468 .nr(16)
22469 .kr(1)
22470 .sr(4)
22471 .m(3)
22472 .n(16)
22473 .k(k)
22474 .ks(3)
22475 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22476 }
22477 }
22478
22479 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, small_kernel_subtile) {
22480 TEST_REQUIRES_X86_FMA3;
22481 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022482 for (uint32_t n = 1; n <= 16; n++) {
22483 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022484 GemmMicrokernelTester()
22485 .mr(3)
22486 .nr(16)
22487 .kr(1)
22488 .sr(4)
22489 .m(m)
22490 .n(n)
22491 .k(k)
22492 .ks(3)
22493 .iterations(1)
22494 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22495 }
22496 }
22497 }
22498 }
22499
22500 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
22501 TEST_REQUIRES_X86_FMA3;
22502 for (uint32_t n = 17; n < 32; n++) {
22503 for (size_t k = 1; k <= 20; k += 5) {
22504 GemmMicrokernelTester()
22505 .mr(3)
22506 .nr(16)
22507 .kr(1)
22508 .sr(4)
22509 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022510 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022511 .k(k)
22512 .ks(3)
22513 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22514 }
22515 }
22516 }
22517
22518 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
22519 TEST_REQUIRES_X86_FMA3;
22520 for (uint32_t n = 32; n <= 48; n += 16) {
22521 for (size_t k = 1; k <= 20; k += 5) {
22522 GemmMicrokernelTester()
22523 .mr(3)
22524 .nr(16)
22525 .kr(1)
22526 .sr(4)
22527 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022528 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022529 .k(k)
22530 .ks(3)
22531 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22532 }
22533 }
22534 }
22535
22536 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm_subtile) {
22537 TEST_REQUIRES_X86_FMA3;
22538 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022539 for (uint32_t n = 1; n <= 16; n++) {
22540 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022541 GemmMicrokernelTester()
22542 .mr(3)
22543 .nr(16)
22544 .kr(1)
22545 .sr(4)
22546 .m(m)
22547 .n(n)
22548 .k(k)
22549 .cm_stride(19)
22550 .iterations(1)
22551 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22552 }
22553 }
22554 }
22555 }
22556
22557 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, a_offset) {
22558 TEST_REQUIRES_X86_FMA3;
22559 for (size_t k = 1; k <= 20; k += 5) {
22560 GemmMicrokernelTester()
22561 .mr(3)
22562 .nr(16)
22563 .kr(1)
22564 .sr(4)
22565 .m(3)
22566 .n(16)
22567 .k(k)
22568 .ks(3)
22569 .a_offset(67)
22570 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22571 }
22572 }
22573
22574 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, zero) {
22575 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022576 for (size_t k = 1; k <= 20; k += 5) {
22577 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022578 GemmMicrokernelTester()
22579 .mr(3)
22580 .nr(16)
22581 .kr(1)
22582 .sr(4)
22583 .m(3)
22584 .n(16)
22585 .k(k)
22586 .ks(3)
22587 .a_offset(67)
22588 .zero_index(mz)
22589 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22590 }
22591 }
22592 }
22593
22594 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, qmin) {
22595 TEST_REQUIRES_X86_FMA3;
22596 GemmMicrokernelTester()
22597 .mr(3)
22598 .nr(16)
22599 .kr(1)
22600 .sr(4)
22601 .m(3)
22602 .n(16)
22603 .k(4)
22604 .qmin(128)
22605 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22606 }
22607
22608 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, qmax) {
22609 TEST_REQUIRES_X86_FMA3;
22610 GemmMicrokernelTester()
22611 .mr(3)
22612 .nr(16)
22613 .kr(1)
22614 .sr(4)
22615 .m(3)
22616 .n(16)
22617 .k(4)
22618 .qmax(128)
22619 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22620 }
22621
22622 TEST(F32_IGEMM_MINMAX_3X16S4__FMA3_BROADCAST, strided_cm) {
22623 TEST_REQUIRES_X86_FMA3;
22624 GemmMicrokernelTester()
22625 .mr(3)
22626 .nr(16)
22627 .kr(1)
22628 .sr(4)
22629 .m(3)
22630 .n(16)
22631 .k(4)
22632 .cm_stride(19)
22633 .Test(xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22634 }
22635#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22636
22637
22638#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22639 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4) {
22640 TEST_REQUIRES_X86_FMA3;
22641 GemmMicrokernelTester()
22642 .mr(4)
22643 .nr(16)
22644 .kr(1)
22645 .sr(4)
22646 .m(4)
22647 .n(16)
22648 .k(4)
22649 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22650 }
22651
22652 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, strided_cn) {
22653 TEST_REQUIRES_X86_FMA3;
22654 GemmMicrokernelTester()
22655 .mr(4)
22656 .nr(16)
22657 .kr(1)
22658 .sr(4)
22659 .m(4)
22660 .n(16)
22661 .k(4)
22662 .cn_stride(19)
22663 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22664 }
22665
22666 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
22667 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022668 for (uint32_t n = 1; n <= 16; n++) {
22669 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022670 GemmMicrokernelTester()
22671 .mr(4)
22672 .nr(16)
22673 .kr(1)
22674 .sr(4)
22675 .m(m)
22676 .n(n)
22677 .k(4)
22678 .iterations(1)
22679 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22680 }
22681 }
22682 }
22683
22684 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
22685 TEST_REQUIRES_X86_FMA3;
22686 for (uint32_t m = 1; m <= 4; m++) {
22687 GemmMicrokernelTester()
22688 .mr(4)
22689 .nr(16)
22690 .kr(1)
22691 .sr(4)
22692 .m(m)
22693 .n(16)
22694 .k(4)
22695 .iterations(1)
22696 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22697 }
22698 }
22699
22700 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
22701 TEST_REQUIRES_X86_FMA3;
22702 for (uint32_t n = 1; n <= 16; n++) {
22703 GemmMicrokernelTester()
22704 .mr(4)
22705 .nr(16)
22706 .kr(1)
22707 .sr(4)
22708 .m(4)
22709 .n(n)
22710 .k(4)
22711 .iterations(1)
22712 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22713 }
22714 }
22715
22716 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4) {
22717 TEST_REQUIRES_X86_FMA3;
22718 for (size_t k = 1; k < 4; k++) {
22719 GemmMicrokernelTester()
22720 .mr(4)
22721 .nr(16)
22722 .kr(1)
22723 .sr(4)
22724 .m(4)
22725 .n(16)
22726 .k(k)
22727 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22728 }
22729 }
22730
22731 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
22732 TEST_REQUIRES_X86_FMA3;
22733 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022734 for (uint32_t n = 1; n <= 16; n++) {
22735 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022736 GemmMicrokernelTester()
22737 .mr(4)
22738 .nr(16)
22739 .kr(1)
22740 .sr(4)
22741 .m(m)
22742 .n(n)
22743 .k(k)
22744 .iterations(1)
22745 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22746 }
22747 }
22748 }
22749 }
22750
22751 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4) {
22752 TEST_REQUIRES_X86_FMA3;
22753 for (size_t k = 5; k < 8; k++) {
22754 GemmMicrokernelTester()
22755 .mr(4)
22756 .nr(16)
22757 .kr(1)
22758 .sr(4)
22759 .m(4)
22760 .n(16)
22761 .k(k)
22762 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22763 }
22764 }
22765
22766 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
22767 TEST_REQUIRES_X86_FMA3;
22768 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022769 for (uint32_t n = 1; n <= 16; n++) {
22770 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022771 GemmMicrokernelTester()
22772 .mr(4)
22773 .nr(16)
22774 .kr(1)
22775 .sr(4)
22776 .m(m)
22777 .n(n)
22778 .k(k)
22779 .iterations(1)
22780 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22781 }
22782 }
22783 }
22784 }
22785
22786 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4) {
22787 TEST_REQUIRES_X86_FMA3;
22788 for (size_t k = 8; k <= 40; k += 4) {
22789 GemmMicrokernelTester()
22790 .mr(4)
22791 .nr(16)
22792 .kr(1)
22793 .sr(4)
22794 .m(4)
22795 .n(16)
22796 .k(k)
22797 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22798 }
22799 }
22800
22801 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, k_div_4_subtile) {
22802 TEST_REQUIRES_X86_FMA3;
22803 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022804 for (uint32_t n = 1; n <= 16; n++) {
22805 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022806 GemmMicrokernelTester()
22807 .mr(4)
22808 .nr(16)
22809 .kr(1)
22810 .sr(4)
22811 .m(m)
22812 .n(n)
22813 .k(k)
22814 .iterations(1)
22815 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22816 }
22817 }
22818 }
22819 }
22820
22821 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16) {
22822 TEST_REQUIRES_X86_FMA3;
22823 for (uint32_t n = 17; n < 32; n++) {
22824 for (size_t k = 1; k <= 20; k += 5) {
22825 GemmMicrokernelTester()
22826 .mr(4)
22827 .nr(16)
22828 .kr(1)
22829 .sr(4)
22830 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022831 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022832 .k(k)
22833 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22834 }
22835 }
22836 }
22837
22838 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
22839 TEST_REQUIRES_X86_FMA3;
22840 for (uint32_t n = 17; n < 32; n++) {
22841 for (size_t k = 1; k <= 20; k += 5) {
22842 GemmMicrokernelTester()
22843 .mr(4)
22844 .nr(16)
22845 .kr(1)
22846 .sr(4)
22847 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022848 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022849 .k(k)
22850 .cn_stride(19)
22851 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22852 }
22853 }
22854 }
22855
22856 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
22857 TEST_REQUIRES_X86_FMA3;
22858 for (uint32_t n = 17; n < 32; n++) {
22859 for (size_t k = 1; k <= 20; k += 5) {
22860 for (uint32_t m = 1; m <= 4; m++) {
22861 GemmMicrokernelTester()
22862 .mr(4)
22863 .nr(16)
22864 .kr(1)
22865 .sr(4)
22866 .m(m)
22867 .n(n)
22868 .k(k)
22869 .iterations(1)
22870 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22871 }
22872 }
22873 }
22874 }
22875
22876 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16) {
22877 TEST_REQUIRES_X86_FMA3;
22878 for (uint32_t n = 32; n <= 48; n += 16) {
22879 for (size_t k = 1; k <= 20; k += 5) {
22880 GemmMicrokernelTester()
22881 .mr(4)
22882 .nr(16)
22883 .kr(1)
22884 .sr(4)
22885 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022886 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022887 .k(k)
22888 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22889 }
22890 }
22891 }
22892
22893 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
22894 TEST_REQUIRES_X86_FMA3;
22895 for (uint32_t n = 32; n <= 48; n += 16) {
22896 for (size_t k = 1; k <= 20; k += 5) {
22897 GemmMicrokernelTester()
22898 .mr(4)
22899 .nr(16)
22900 .kr(1)
22901 .sr(4)
22902 .m(4)
22903 .n(n)
22904 .k(k)
22905 .cn_stride(19)
22906 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22907 }
22908 }
22909 }
22910
22911 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_subtile) {
22912 TEST_REQUIRES_X86_FMA3;
22913 for (uint32_t n = 32; n <= 48; n += 16) {
22914 for (size_t k = 1; k <= 20; k += 5) {
22915 for (uint32_t m = 1; m <= 4; m++) {
22916 GemmMicrokernelTester()
22917 .mr(4)
22918 .nr(16)
22919 .kr(1)
22920 .sr(4)
22921 .m(m)
22922 .n(n)
22923 .k(k)
22924 .iterations(1)
22925 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22926 }
22927 }
22928 }
22929 }
22930
22931 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, small_kernel) {
22932 TEST_REQUIRES_X86_FMA3;
22933 for (size_t k = 1; k <= 20; k += 5) {
22934 GemmMicrokernelTester()
22935 .mr(4)
22936 .nr(16)
22937 .kr(1)
22938 .sr(4)
22939 .m(4)
22940 .n(16)
22941 .k(k)
22942 .ks(3)
22943 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22944 }
22945 }
22946
22947 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, small_kernel_subtile) {
22948 TEST_REQUIRES_X86_FMA3;
22949 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022950 for (uint32_t n = 1; n <= 16; n++) {
22951 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022952 GemmMicrokernelTester()
22953 .mr(4)
22954 .nr(16)
22955 .kr(1)
22956 .sr(4)
22957 .m(m)
22958 .n(n)
22959 .k(k)
22960 .ks(3)
22961 .iterations(1)
22962 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22963 }
22964 }
22965 }
22966 }
22967
22968 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
22969 TEST_REQUIRES_X86_FMA3;
22970 for (uint32_t n = 17; n < 32; n++) {
22971 for (size_t k = 1; k <= 20; k += 5) {
22972 GemmMicrokernelTester()
22973 .mr(4)
22974 .nr(16)
22975 .kr(1)
22976 .sr(4)
22977 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022978 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022979 .k(k)
22980 .ks(3)
22981 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
22982 }
22983 }
22984 }
22985
22986 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
22987 TEST_REQUIRES_X86_FMA3;
22988 for (uint32_t n = 32; n <= 48; n += 16) {
22989 for (size_t k = 1; k <= 20; k += 5) {
22990 GemmMicrokernelTester()
22991 .mr(4)
22992 .nr(16)
22993 .kr(1)
22994 .sr(4)
22995 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022996 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080022997 .k(k)
22998 .ks(3)
22999 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23000 }
23001 }
23002 }
23003
23004 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm_subtile) {
23005 TEST_REQUIRES_X86_FMA3;
23006 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023007 for (uint32_t n = 1; n <= 16; n++) {
23008 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023009 GemmMicrokernelTester()
23010 .mr(4)
23011 .nr(16)
23012 .kr(1)
23013 .sr(4)
23014 .m(m)
23015 .n(n)
23016 .k(k)
23017 .cm_stride(19)
23018 .iterations(1)
23019 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23020 }
23021 }
23022 }
23023 }
23024
23025 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, a_offset) {
23026 TEST_REQUIRES_X86_FMA3;
23027 for (size_t k = 1; k <= 20; k += 5) {
23028 GemmMicrokernelTester()
23029 .mr(4)
23030 .nr(16)
23031 .kr(1)
23032 .sr(4)
23033 .m(4)
23034 .n(16)
23035 .k(k)
23036 .ks(3)
23037 .a_offset(83)
23038 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23039 }
23040 }
23041
23042 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, zero) {
23043 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023044 for (size_t k = 1; k <= 20; k += 5) {
23045 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023046 GemmMicrokernelTester()
23047 .mr(4)
23048 .nr(16)
23049 .kr(1)
23050 .sr(4)
23051 .m(4)
23052 .n(16)
23053 .k(k)
23054 .ks(3)
23055 .a_offset(83)
23056 .zero_index(mz)
23057 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23058 }
23059 }
23060 }
23061
23062 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, qmin) {
23063 TEST_REQUIRES_X86_FMA3;
23064 GemmMicrokernelTester()
23065 .mr(4)
23066 .nr(16)
23067 .kr(1)
23068 .sr(4)
23069 .m(4)
23070 .n(16)
23071 .k(4)
23072 .qmin(128)
23073 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23074 }
23075
23076 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, qmax) {
23077 TEST_REQUIRES_X86_FMA3;
23078 GemmMicrokernelTester()
23079 .mr(4)
23080 .nr(16)
23081 .kr(1)
23082 .sr(4)
23083 .m(4)
23084 .n(16)
23085 .k(4)
23086 .qmax(128)
23087 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23088 }
23089
23090 TEST(F32_IGEMM_MINMAX_4X16S4__FMA3_BROADCAST, strided_cm) {
23091 TEST_REQUIRES_X86_FMA3;
23092 GemmMicrokernelTester()
23093 .mr(4)
23094 .nr(16)
23095 .kr(1)
23096 .sr(4)
23097 .m(4)
23098 .n(16)
23099 .k(4)
23100 .cm_stride(19)
23101 .Test(xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23102 }
23103#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23104
23105
23106#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23107 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4) {
23108 TEST_REQUIRES_X86_FMA3;
23109 GemmMicrokernelTester()
23110 .mr(5)
23111 .nr(16)
23112 .kr(1)
23113 .sr(4)
23114 .m(5)
23115 .n(16)
23116 .k(4)
23117 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23118 }
23119
23120 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, strided_cn) {
23121 TEST_REQUIRES_X86_FMA3;
23122 GemmMicrokernelTester()
23123 .mr(5)
23124 .nr(16)
23125 .kr(1)
23126 .sr(4)
23127 .m(5)
23128 .n(16)
23129 .k(4)
23130 .cn_stride(19)
23131 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23132 }
23133
23134 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile) {
23135 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023136 for (uint32_t n = 1; n <= 16; n++) {
23137 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023138 GemmMicrokernelTester()
23139 .mr(5)
23140 .nr(16)
23141 .kr(1)
23142 .sr(4)
23143 .m(m)
23144 .n(n)
23145 .k(4)
23146 .iterations(1)
23147 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23148 }
23149 }
23150 }
23151
23152 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_m) {
23153 TEST_REQUIRES_X86_FMA3;
23154 for (uint32_t m = 1; m <= 5; m++) {
23155 GemmMicrokernelTester()
23156 .mr(5)
23157 .nr(16)
23158 .kr(1)
23159 .sr(4)
23160 .m(m)
23161 .n(16)
23162 .k(4)
23163 .iterations(1)
23164 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23165 }
23166 }
23167
23168 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_eq_4_subtile_n) {
23169 TEST_REQUIRES_X86_FMA3;
23170 for (uint32_t n = 1; n <= 16; n++) {
23171 GemmMicrokernelTester()
23172 .mr(5)
23173 .nr(16)
23174 .kr(1)
23175 .sr(4)
23176 .m(5)
23177 .n(n)
23178 .k(4)
23179 .iterations(1)
23180 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23181 }
23182 }
23183
23184 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4) {
23185 TEST_REQUIRES_X86_FMA3;
23186 for (size_t k = 1; k < 4; k++) {
23187 GemmMicrokernelTester()
23188 .mr(5)
23189 .nr(16)
23190 .kr(1)
23191 .sr(4)
23192 .m(5)
23193 .n(16)
23194 .k(k)
23195 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23196 }
23197 }
23198
23199 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_lt_4_subtile) {
23200 TEST_REQUIRES_X86_FMA3;
23201 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023202 for (uint32_t n = 1; n <= 16; n++) {
23203 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023204 GemmMicrokernelTester()
23205 .mr(5)
23206 .nr(16)
23207 .kr(1)
23208 .sr(4)
23209 .m(m)
23210 .n(n)
23211 .k(k)
23212 .iterations(1)
23213 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23214 }
23215 }
23216 }
23217 }
23218
23219 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4) {
23220 TEST_REQUIRES_X86_FMA3;
23221 for (size_t k = 5; k < 8; k++) {
23222 GemmMicrokernelTester()
23223 .mr(5)
23224 .nr(16)
23225 .kr(1)
23226 .sr(4)
23227 .m(5)
23228 .n(16)
23229 .k(k)
23230 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23231 }
23232 }
23233
23234 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_gt_4_subtile) {
23235 TEST_REQUIRES_X86_FMA3;
23236 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023237 for (uint32_t n = 1; n <= 16; n++) {
23238 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023239 GemmMicrokernelTester()
23240 .mr(5)
23241 .nr(16)
23242 .kr(1)
23243 .sr(4)
23244 .m(m)
23245 .n(n)
23246 .k(k)
23247 .iterations(1)
23248 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23249 }
23250 }
23251 }
23252 }
23253
23254 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4) {
23255 TEST_REQUIRES_X86_FMA3;
23256 for (size_t k = 8; k <= 40; k += 4) {
23257 GemmMicrokernelTester()
23258 .mr(5)
23259 .nr(16)
23260 .kr(1)
23261 .sr(4)
23262 .m(5)
23263 .n(16)
23264 .k(k)
23265 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23266 }
23267 }
23268
23269 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, k_div_4_subtile) {
23270 TEST_REQUIRES_X86_FMA3;
23271 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023272 for (uint32_t n = 1; n <= 16; n++) {
23273 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023274 GemmMicrokernelTester()
23275 .mr(5)
23276 .nr(16)
23277 .kr(1)
23278 .sr(4)
23279 .m(m)
23280 .n(n)
23281 .k(k)
23282 .iterations(1)
23283 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23284 }
23285 }
23286 }
23287 }
23288
23289 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16) {
23290 TEST_REQUIRES_X86_FMA3;
23291 for (uint32_t n = 17; n < 32; n++) {
23292 for (size_t k = 1; k <= 20; k += 5) {
23293 GemmMicrokernelTester()
23294 .mr(5)
23295 .nr(16)
23296 .kr(1)
23297 .sr(4)
23298 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023299 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023300 .k(k)
23301 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23302 }
23303 }
23304 }
23305
23306 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_strided_cn) {
23307 TEST_REQUIRES_X86_FMA3;
23308 for (uint32_t n = 17; n < 32; n++) {
23309 for (size_t k = 1; k <= 20; k += 5) {
23310 GemmMicrokernelTester()
23311 .mr(5)
23312 .nr(16)
23313 .kr(1)
23314 .sr(4)
23315 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023316 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023317 .k(k)
23318 .cn_stride(19)
23319 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23320 }
23321 }
23322 }
23323
23324 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_subtile) {
23325 TEST_REQUIRES_X86_FMA3;
23326 for (uint32_t n = 17; n < 32; n++) {
23327 for (size_t k = 1; k <= 20; k += 5) {
23328 for (uint32_t m = 1; m <= 5; m++) {
23329 GemmMicrokernelTester()
23330 .mr(5)
23331 .nr(16)
23332 .kr(1)
23333 .sr(4)
23334 .m(m)
23335 .n(n)
23336 .k(k)
23337 .iterations(1)
23338 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23339 }
23340 }
23341 }
23342 }
23343
23344 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16) {
23345 TEST_REQUIRES_X86_FMA3;
23346 for (uint32_t n = 32; n <= 48; n += 16) {
23347 for (size_t k = 1; k <= 20; k += 5) {
23348 GemmMicrokernelTester()
23349 .mr(5)
23350 .nr(16)
23351 .kr(1)
23352 .sr(4)
23353 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023354 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023355 .k(k)
23356 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23357 }
23358 }
23359 }
23360
23361 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_strided_cn) {
23362 TEST_REQUIRES_X86_FMA3;
23363 for (uint32_t n = 32; n <= 48; n += 16) {
23364 for (size_t k = 1; k <= 20; k += 5) {
23365 GemmMicrokernelTester()
23366 .mr(5)
23367 .nr(16)
23368 .kr(1)
23369 .sr(4)
23370 .m(5)
23371 .n(n)
23372 .k(k)
23373 .cn_stride(19)
23374 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23375 }
23376 }
23377 }
23378
23379 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_subtile) {
23380 TEST_REQUIRES_X86_FMA3;
23381 for (uint32_t n = 32; n <= 48; n += 16) {
23382 for (size_t k = 1; k <= 20; k += 5) {
23383 for (uint32_t m = 1; m <= 5; m++) {
23384 GemmMicrokernelTester()
23385 .mr(5)
23386 .nr(16)
23387 .kr(1)
23388 .sr(4)
23389 .m(m)
23390 .n(n)
23391 .k(k)
23392 .iterations(1)
23393 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23394 }
23395 }
23396 }
23397 }
23398
23399 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, small_kernel) {
23400 TEST_REQUIRES_X86_FMA3;
23401 for (size_t k = 1; k <= 20; k += 5) {
23402 GemmMicrokernelTester()
23403 .mr(5)
23404 .nr(16)
23405 .kr(1)
23406 .sr(4)
23407 .m(5)
23408 .n(16)
23409 .k(k)
23410 .ks(3)
23411 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23412 }
23413 }
23414
23415 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, small_kernel_subtile) {
23416 TEST_REQUIRES_X86_FMA3;
23417 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023418 for (uint32_t n = 1; n <= 16; n++) {
23419 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023420 GemmMicrokernelTester()
23421 .mr(5)
23422 .nr(16)
23423 .kr(1)
23424 .sr(4)
23425 .m(m)
23426 .n(n)
23427 .k(k)
23428 .ks(3)
23429 .iterations(1)
23430 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23431 }
23432 }
23433 }
23434 }
23435
23436 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_gt_16_small_kernel) {
23437 TEST_REQUIRES_X86_FMA3;
23438 for (uint32_t n = 17; n < 32; n++) {
23439 for (size_t k = 1; k <= 20; k += 5) {
23440 GemmMicrokernelTester()
23441 .mr(5)
23442 .nr(16)
23443 .kr(1)
23444 .sr(4)
23445 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023446 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023447 .k(k)
23448 .ks(3)
23449 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23450 }
23451 }
23452 }
23453
23454 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, n_div_16_small_kernel) {
23455 TEST_REQUIRES_X86_FMA3;
23456 for (uint32_t n = 32; n <= 48; n += 16) {
23457 for (size_t k = 1; k <= 20; k += 5) {
23458 GemmMicrokernelTester()
23459 .mr(5)
23460 .nr(16)
23461 .kr(1)
23462 .sr(4)
23463 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023464 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023465 .k(k)
23466 .ks(3)
23467 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23468 }
23469 }
23470 }
23471
23472 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm_subtile) {
23473 TEST_REQUIRES_X86_FMA3;
23474 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023475 for (uint32_t n = 1; n <= 16; n++) {
23476 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023477 GemmMicrokernelTester()
23478 .mr(5)
23479 .nr(16)
23480 .kr(1)
23481 .sr(4)
23482 .m(m)
23483 .n(n)
23484 .k(k)
23485 .cm_stride(19)
23486 .iterations(1)
23487 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23488 }
23489 }
23490 }
23491 }
23492
23493 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, a_offset) {
23494 TEST_REQUIRES_X86_FMA3;
23495 for (size_t k = 1; k <= 20; k += 5) {
23496 GemmMicrokernelTester()
23497 .mr(5)
23498 .nr(16)
23499 .kr(1)
23500 .sr(4)
23501 .m(5)
23502 .n(16)
23503 .k(k)
23504 .ks(3)
23505 .a_offset(103)
23506 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23507 }
23508 }
23509
23510 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, zero) {
23511 TEST_REQUIRES_X86_FMA3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023512 for (size_t k = 1; k <= 20; k += 5) {
23513 for (uint32_t mz = 0; mz < 5; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023514 GemmMicrokernelTester()
23515 .mr(5)
23516 .nr(16)
23517 .kr(1)
23518 .sr(4)
23519 .m(5)
23520 .n(16)
23521 .k(k)
23522 .ks(3)
23523 .a_offset(103)
23524 .zero_index(mz)
23525 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23526 }
23527 }
23528 }
23529
23530 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, qmin) {
23531 TEST_REQUIRES_X86_FMA3;
23532 GemmMicrokernelTester()
23533 .mr(5)
23534 .nr(16)
23535 .kr(1)
23536 .sr(4)
23537 .m(5)
23538 .n(16)
23539 .k(4)
23540 .qmin(128)
23541 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23542 }
23543
23544 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, qmax) {
23545 TEST_REQUIRES_X86_FMA3;
23546 GemmMicrokernelTester()
23547 .mr(5)
23548 .nr(16)
23549 .kr(1)
23550 .sr(4)
23551 .m(5)
23552 .n(16)
23553 .k(4)
23554 .qmax(128)
23555 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23556 }
23557
23558 TEST(F32_IGEMM_MINMAX_5X16S4__FMA3_BROADCAST, strided_cm) {
23559 TEST_REQUIRES_X86_FMA3;
23560 GemmMicrokernelTester()
23561 .mr(5)
23562 .nr(16)
23563 .kr(1)
23564 .sr(4)
23565 .m(5)
23566 .n(16)
23567 .k(4)
23568 .cm_stride(19)
23569 .Test(xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast, xnn_init_f32_minmax_avx_params);
23570 }
23571#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23572
23573
23574#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23575 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1) {
23576 TEST_REQUIRES_X86_AVX512F;
23577 GemmMicrokernelTester()
23578 .mr(1)
23579 .nr(16)
23580 .kr(1)
23581 .sr(1)
23582 .m(1)
23583 .n(16)
23584 .k(1)
23585 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23586 }
23587
23588 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cn) {
23589 TEST_REQUIRES_X86_AVX512F;
23590 GemmMicrokernelTester()
23591 .mr(1)
23592 .nr(16)
23593 .kr(1)
23594 .sr(1)
23595 .m(1)
23596 .n(16)
23597 .k(1)
23598 .cn_stride(19)
23599 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23600 }
23601
23602 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile) {
23603 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023604 for (uint32_t n = 1; n <= 16; n++) {
23605 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023606 GemmMicrokernelTester()
23607 .mr(1)
23608 .nr(16)
23609 .kr(1)
23610 .sr(1)
23611 .m(m)
23612 .n(n)
23613 .k(1)
23614 .iterations(1)
23615 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23616 }
23617 }
23618 }
23619
23620 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
23621 TEST_REQUIRES_X86_AVX512F;
23622 for (uint32_t m = 1; m <= 1; m++) {
23623 GemmMicrokernelTester()
23624 .mr(1)
23625 .nr(16)
23626 .kr(1)
23627 .sr(1)
23628 .m(m)
23629 .n(16)
23630 .k(1)
23631 .iterations(1)
23632 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23633 }
23634 }
23635
23636 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
23637 TEST_REQUIRES_X86_AVX512F;
23638 for (uint32_t n = 1; n <= 16; n++) {
23639 GemmMicrokernelTester()
23640 .mr(1)
23641 .nr(16)
23642 .kr(1)
23643 .sr(1)
23644 .m(1)
23645 .n(n)
23646 .k(1)
23647 .iterations(1)
23648 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23649 }
23650 }
23651
23652 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1) {
23653 TEST_REQUIRES_X86_AVX512F;
23654 for (size_t k = 2; k < 10; k++) {
23655 GemmMicrokernelTester()
23656 .mr(1)
23657 .nr(16)
23658 .kr(1)
23659 .sr(1)
23660 .m(1)
23661 .n(16)
23662 .k(k)
23663 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23664 }
23665 }
23666
23667 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, k_gt_1_subtile) {
23668 TEST_REQUIRES_X86_AVX512F;
23669 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023670 for (uint32_t n = 1; n <= 16; n++) {
23671 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023672 GemmMicrokernelTester()
23673 .mr(1)
23674 .nr(16)
23675 .kr(1)
23676 .sr(1)
23677 .m(m)
23678 .n(n)
23679 .k(k)
23680 .iterations(1)
23681 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23682 }
23683 }
23684 }
23685 }
23686
23687 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16) {
23688 TEST_REQUIRES_X86_AVX512F;
23689 for (uint32_t n = 17; n < 32; n++) {
23690 for (size_t k = 1; k <= 5; k += 2) {
23691 GemmMicrokernelTester()
23692 .mr(1)
23693 .nr(16)
23694 .kr(1)
23695 .sr(1)
23696 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023697 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023698 .k(k)
23699 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23700 }
23701 }
23702 }
23703
23704 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
23705 TEST_REQUIRES_X86_AVX512F;
23706 for (uint32_t n = 17; n < 32; n++) {
23707 for (size_t k = 1; k <= 5; k += 2) {
23708 GemmMicrokernelTester()
23709 .mr(1)
23710 .nr(16)
23711 .kr(1)
23712 .sr(1)
23713 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023714 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023715 .k(k)
23716 .cn_stride(19)
23717 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23718 }
23719 }
23720 }
23721
23722 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_subtile) {
23723 TEST_REQUIRES_X86_AVX512F;
23724 for (uint32_t n = 17; n < 32; n++) {
23725 for (size_t k = 1; k <= 5; k += 2) {
23726 for (uint32_t m = 1; m <= 1; m++) {
23727 GemmMicrokernelTester()
23728 .mr(1)
23729 .nr(16)
23730 .kr(1)
23731 .sr(1)
23732 .m(m)
23733 .n(n)
23734 .k(k)
23735 .iterations(1)
23736 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23737 }
23738 }
23739 }
23740 }
23741
23742 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16) {
23743 TEST_REQUIRES_X86_AVX512F;
23744 for (uint32_t n = 32; n <= 48; n += 16) {
23745 for (size_t k = 1; k <= 5; k += 2) {
23746 GemmMicrokernelTester()
23747 .mr(1)
23748 .nr(16)
23749 .kr(1)
23750 .sr(1)
23751 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023752 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023753 .k(k)
23754 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23755 }
23756 }
23757 }
23758
23759 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
23760 TEST_REQUIRES_X86_AVX512F;
23761 for (uint32_t n = 32; n <= 48; n += 16) {
23762 for (size_t k = 1; k <= 5; k += 2) {
23763 GemmMicrokernelTester()
23764 .mr(1)
23765 .nr(16)
23766 .kr(1)
23767 .sr(1)
23768 .m(1)
23769 .n(n)
23770 .k(k)
23771 .cn_stride(19)
23772 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23773 }
23774 }
23775 }
23776
23777 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_subtile) {
23778 TEST_REQUIRES_X86_AVX512F;
23779 for (uint32_t n = 32; n <= 48; n += 16) {
23780 for (size_t k = 1; k <= 5; k += 2) {
23781 for (uint32_t m = 1; m <= 1; m++) {
23782 GemmMicrokernelTester()
23783 .mr(1)
23784 .nr(16)
23785 .kr(1)
23786 .sr(1)
23787 .m(m)
23788 .n(n)
23789 .k(k)
23790 .iterations(1)
23791 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23792 }
23793 }
23794 }
23795 }
23796
23797 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, small_kernel) {
23798 TEST_REQUIRES_X86_AVX512F;
23799 for (size_t k = 1; k <= 5; k += 2) {
23800 GemmMicrokernelTester()
23801 .mr(1)
23802 .nr(16)
23803 .kr(1)
23804 .sr(1)
23805 .m(1)
23806 .n(16)
23807 .k(k)
23808 .ks(3)
23809 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23810 }
23811 }
23812
23813 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, small_kernel_subtile) {
23814 TEST_REQUIRES_X86_AVX512F;
23815 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023816 for (uint32_t n = 1; n <= 16; n++) {
23817 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023818 GemmMicrokernelTester()
23819 .mr(1)
23820 .nr(16)
23821 .kr(1)
23822 .sr(1)
23823 .m(m)
23824 .n(n)
23825 .k(k)
23826 .ks(3)
23827 .iterations(1)
23828 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23829 }
23830 }
23831 }
23832 }
23833
23834 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
23835 TEST_REQUIRES_X86_AVX512F;
23836 for (uint32_t n = 17; n < 32; n++) {
23837 for (size_t k = 1; k <= 5; k += 2) {
23838 GemmMicrokernelTester()
23839 .mr(1)
23840 .nr(16)
23841 .kr(1)
23842 .sr(1)
23843 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023844 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023845 .k(k)
23846 .ks(3)
23847 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23848 }
23849 }
23850 }
23851
23852 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
23853 TEST_REQUIRES_X86_AVX512F;
23854 for (uint32_t n = 32; n <= 48; n += 16) {
23855 for (size_t k = 1; k <= 5; k += 2) {
23856 GemmMicrokernelTester()
23857 .mr(1)
23858 .nr(16)
23859 .kr(1)
23860 .sr(1)
23861 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023862 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023863 .k(k)
23864 .ks(3)
23865 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23866 }
23867 }
23868 }
23869
23870 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cm_subtile) {
23871 TEST_REQUIRES_X86_AVX512F;
23872 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023873 for (uint32_t n = 1; n <= 16; n++) {
23874 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023875 GemmMicrokernelTester()
23876 .mr(1)
23877 .nr(16)
23878 .kr(1)
23879 .sr(1)
23880 .m(m)
23881 .n(n)
23882 .k(k)
23883 .cm_stride(19)
23884 .iterations(1)
23885 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23886 }
23887 }
23888 }
23889 }
23890
23891 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, a_offset) {
23892 TEST_REQUIRES_X86_AVX512F;
23893 for (size_t k = 1; k <= 5; k += 2) {
23894 GemmMicrokernelTester()
23895 .mr(1)
23896 .nr(16)
23897 .kr(1)
23898 .sr(1)
23899 .m(1)
23900 .n(16)
23901 .k(k)
23902 .ks(3)
23903 .a_offset(7)
23904 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23905 }
23906 }
23907
23908 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, zero) {
23909 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023910 for (size_t k = 1; k <= 5; k += 2) {
23911 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080023912 GemmMicrokernelTester()
23913 .mr(1)
23914 .nr(16)
23915 .kr(1)
23916 .sr(1)
23917 .m(1)
23918 .n(16)
23919 .k(k)
23920 .ks(3)
23921 .a_offset(7)
23922 .zero_index(mz)
23923 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23924 }
23925 }
23926 }
23927
23928 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, qmin) {
23929 TEST_REQUIRES_X86_AVX512F;
23930 GemmMicrokernelTester()
23931 .mr(1)
23932 .nr(16)
23933 .kr(1)
23934 .sr(1)
23935 .m(1)
23936 .n(16)
23937 .k(1)
23938 .qmin(128)
23939 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23940 }
23941
23942 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, qmax) {
23943 TEST_REQUIRES_X86_AVX512F;
23944 GemmMicrokernelTester()
23945 .mr(1)
23946 .nr(16)
23947 .kr(1)
23948 .sr(1)
23949 .m(1)
23950 .n(16)
23951 .k(1)
23952 .qmax(128)
23953 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23954 }
23955
23956 TEST(F32_IGEMM_MINMAX_1X16__AVX512F_BROADCAST, strided_cm) {
23957 TEST_REQUIRES_X86_AVX512F;
23958 GemmMicrokernelTester()
23959 .mr(1)
23960 .nr(16)
23961 .kr(1)
23962 .sr(1)
23963 .m(1)
23964 .n(16)
23965 .k(1)
23966 .cm_stride(19)
23967 .Test(xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23968 }
23969#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23970
23971
23972#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23973 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1) {
23974 TEST_REQUIRES_X86_AVX512F;
23975 GemmMicrokernelTester()
23976 .mr(6)
23977 .nr(16)
23978 .kr(1)
23979 .sr(1)
23980 .m(6)
23981 .n(16)
23982 .k(1)
23983 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23984 }
23985
23986 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cn) {
23987 TEST_REQUIRES_X86_AVX512F;
23988 GemmMicrokernelTester()
23989 .mr(6)
23990 .nr(16)
23991 .kr(1)
23992 .sr(1)
23993 .m(6)
23994 .n(16)
23995 .k(1)
23996 .cn_stride(19)
23997 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
23998 }
23999
24000 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile) {
24001 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024002 for (uint32_t n = 1; n <= 16; n++) {
24003 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024004 GemmMicrokernelTester()
24005 .mr(6)
24006 .nr(16)
24007 .kr(1)
24008 .sr(1)
24009 .m(m)
24010 .n(n)
24011 .k(1)
24012 .iterations(1)
24013 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24014 }
24015 }
24016 }
24017
24018 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
24019 TEST_REQUIRES_X86_AVX512F;
24020 for (uint32_t m = 1; m <= 6; m++) {
24021 GemmMicrokernelTester()
24022 .mr(6)
24023 .nr(16)
24024 .kr(1)
24025 .sr(1)
24026 .m(m)
24027 .n(16)
24028 .k(1)
24029 .iterations(1)
24030 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24031 }
24032 }
24033
24034 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
24035 TEST_REQUIRES_X86_AVX512F;
24036 for (uint32_t n = 1; n <= 16; n++) {
24037 GemmMicrokernelTester()
24038 .mr(6)
24039 .nr(16)
24040 .kr(1)
24041 .sr(1)
24042 .m(6)
24043 .n(n)
24044 .k(1)
24045 .iterations(1)
24046 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24047 }
24048 }
24049
24050 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1) {
24051 TEST_REQUIRES_X86_AVX512F;
24052 for (size_t k = 2; k < 10; k++) {
24053 GemmMicrokernelTester()
24054 .mr(6)
24055 .nr(16)
24056 .kr(1)
24057 .sr(1)
24058 .m(6)
24059 .n(16)
24060 .k(k)
24061 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24062 }
24063 }
24064
24065 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, k_gt_1_subtile) {
24066 TEST_REQUIRES_X86_AVX512F;
24067 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024068 for (uint32_t n = 1; n <= 16; n++) {
24069 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024070 GemmMicrokernelTester()
24071 .mr(6)
24072 .nr(16)
24073 .kr(1)
24074 .sr(1)
24075 .m(m)
24076 .n(n)
24077 .k(k)
24078 .iterations(1)
24079 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24080 }
24081 }
24082 }
24083 }
24084
24085 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16) {
24086 TEST_REQUIRES_X86_AVX512F;
24087 for (uint32_t n = 17; n < 32; n++) {
24088 for (size_t k = 1; k <= 5; k += 2) {
24089 GemmMicrokernelTester()
24090 .mr(6)
24091 .nr(16)
24092 .kr(1)
24093 .sr(1)
24094 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024095 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024096 .k(k)
24097 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24098 }
24099 }
24100 }
24101
24102 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
24103 TEST_REQUIRES_X86_AVX512F;
24104 for (uint32_t n = 17; n < 32; n++) {
24105 for (size_t k = 1; k <= 5; k += 2) {
24106 GemmMicrokernelTester()
24107 .mr(6)
24108 .nr(16)
24109 .kr(1)
24110 .sr(1)
24111 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024112 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024113 .k(k)
24114 .cn_stride(19)
24115 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24116 }
24117 }
24118 }
24119
24120 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_subtile) {
24121 TEST_REQUIRES_X86_AVX512F;
24122 for (uint32_t n = 17; n < 32; n++) {
24123 for (size_t k = 1; k <= 5; k += 2) {
24124 for (uint32_t m = 1; m <= 6; m++) {
24125 GemmMicrokernelTester()
24126 .mr(6)
24127 .nr(16)
24128 .kr(1)
24129 .sr(1)
24130 .m(m)
24131 .n(n)
24132 .k(k)
24133 .iterations(1)
24134 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24135 }
24136 }
24137 }
24138 }
24139
24140 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16) {
24141 TEST_REQUIRES_X86_AVX512F;
24142 for (uint32_t n = 32; n <= 48; n += 16) {
24143 for (size_t k = 1; k <= 5; k += 2) {
24144 GemmMicrokernelTester()
24145 .mr(6)
24146 .nr(16)
24147 .kr(1)
24148 .sr(1)
24149 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024150 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024151 .k(k)
24152 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24153 }
24154 }
24155 }
24156
24157 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
24158 TEST_REQUIRES_X86_AVX512F;
24159 for (uint32_t n = 32; n <= 48; n += 16) {
24160 for (size_t k = 1; k <= 5; k += 2) {
24161 GemmMicrokernelTester()
24162 .mr(6)
24163 .nr(16)
24164 .kr(1)
24165 .sr(1)
24166 .m(6)
24167 .n(n)
24168 .k(k)
24169 .cn_stride(19)
24170 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24171 }
24172 }
24173 }
24174
24175 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_subtile) {
24176 TEST_REQUIRES_X86_AVX512F;
24177 for (uint32_t n = 32; n <= 48; n += 16) {
24178 for (size_t k = 1; k <= 5; k += 2) {
24179 for (uint32_t m = 1; m <= 6; m++) {
24180 GemmMicrokernelTester()
24181 .mr(6)
24182 .nr(16)
24183 .kr(1)
24184 .sr(1)
24185 .m(m)
24186 .n(n)
24187 .k(k)
24188 .iterations(1)
24189 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24190 }
24191 }
24192 }
24193 }
24194
24195 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, small_kernel) {
24196 TEST_REQUIRES_X86_AVX512F;
24197 for (size_t k = 1; k <= 5; k += 2) {
24198 GemmMicrokernelTester()
24199 .mr(6)
24200 .nr(16)
24201 .kr(1)
24202 .sr(1)
24203 .m(6)
24204 .n(16)
24205 .k(k)
24206 .ks(3)
24207 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24208 }
24209 }
24210
24211 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, small_kernel_subtile) {
24212 TEST_REQUIRES_X86_AVX512F;
24213 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024214 for (uint32_t n = 1; n <= 16; n++) {
24215 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024216 GemmMicrokernelTester()
24217 .mr(6)
24218 .nr(16)
24219 .kr(1)
24220 .sr(1)
24221 .m(m)
24222 .n(n)
24223 .k(k)
24224 .ks(3)
24225 .iterations(1)
24226 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24227 }
24228 }
24229 }
24230 }
24231
24232 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
24233 TEST_REQUIRES_X86_AVX512F;
24234 for (uint32_t n = 17; n < 32; n++) {
24235 for (size_t k = 1; k <= 5; k += 2) {
24236 GemmMicrokernelTester()
24237 .mr(6)
24238 .nr(16)
24239 .kr(1)
24240 .sr(1)
24241 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024242 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024243 .k(k)
24244 .ks(3)
24245 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24246 }
24247 }
24248 }
24249
24250 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
24251 TEST_REQUIRES_X86_AVX512F;
24252 for (uint32_t n = 32; n <= 48; n += 16) {
24253 for (size_t k = 1; k <= 5; k += 2) {
24254 GemmMicrokernelTester()
24255 .mr(6)
24256 .nr(16)
24257 .kr(1)
24258 .sr(1)
24259 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024260 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024261 .k(k)
24262 .ks(3)
24263 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24264 }
24265 }
24266 }
24267
24268 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cm_subtile) {
24269 TEST_REQUIRES_X86_AVX512F;
24270 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024271 for (uint32_t n = 1; n <= 16; n++) {
24272 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024273 GemmMicrokernelTester()
24274 .mr(6)
24275 .nr(16)
24276 .kr(1)
24277 .sr(1)
24278 .m(m)
24279 .n(n)
24280 .k(k)
24281 .cm_stride(19)
24282 .iterations(1)
24283 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24284 }
24285 }
24286 }
24287 }
24288
24289 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, a_offset) {
24290 TEST_REQUIRES_X86_AVX512F;
24291 for (size_t k = 1; k <= 5; k += 2) {
24292 GemmMicrokernelTester()
24293 .mr(6)
24294 .nr(16)
24295 .kr(1)
24296 .sr(1)
24297 .m(6)
24298 .n(16)
24299 .k(k)
24300 .ks(3)
24301 .a_offset(37)
24302 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24303 }
24304 }
24305
24306 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, zero) {
24307 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024308 for (size_t k = 1; k <= 5; k += 2) {
24309 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024310 GemmMicrokernelTester()
24311 .mr(6)
24312 .nr(16)
24313 .kr(1)
24314 .sr(1)
24315 .m(6)
24316 .n(16)
24317 .k(k)
24318 .ks(3)
24319 .a_offset(37)
24320 .zero_index(mz)
24321 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24322 }
24323 }
24324 }
24325
24326 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, qmin) {
24327 TEST_REQUIRES_X86_AVX512F;
24328 GemmMicrokernelTester()
24329 .mr(6)
24330 .nr(16)
24331 .kr(1)
24332 .sr(1)
24333 .m(6)
24334 .n(16)
24335 .k(1)
24336 .qmin(128)
24337 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24338 }
24339
24340 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, qmax) {
24341 TEST_REQUIRES_X86_AVX512F;
24342 GemmMicrokernelTester()
24343 .mr(6)
24344 .nr(16)
24345 .kr(1)
24346 .sr(1)
24347 .m(6)
24348 .n(16)
24349 .k(1)
24350 .qmax(128)
24351 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24352 }
24353
24354 TEST(F32_IGEMM_MINMAX_6X16__AVX512F_BROADCAST, strided_cm) {
24355 TEST_REQUIRES_X86_AVX512F;
24356 GemmMicrokernelTester()
24357 .mr(6)
24358 .nr(16)
24359 .kr(1)
24360 .sr(1)
24361 .m(6)
24362 .n(16)
24363 .k(1)
24364 .cm_stride(19)
24365 .Test(xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24366 }
24367#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24368
24369
24370#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24371 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1) {
24372 TEST_REQUIRES_X86_AVX512F;
24373 GemmMicrokernelTester()
24374 .mr(7)
24375 .nr(16)
24376 .kr(1)
24377 .sr(1)
24378 .m(7)
24379 .n(16)
24380 .k(1)
24381 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24382 }
24383
24384 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cn) {
24385 TEST_REQUIRES_X86_AVX512F;
24386 GemmMicrokernelTester()
24387 .mr(7)
24388 .nr(16)
24389 .kr(1)
24390 .sr(1)
24391 .m(7)
24392 .n(16)
24393 .k(1)
24394 .cn_stride(19)
24395 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24396 }
24397
24398 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile) {
24399 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024400 for (uint32_t n = 1; n <= 16; n++) {
24401 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024402 GemmMicrokernelTester()
24403 .mr(7)
24404 .nr(16)
24405 .kr(1)
24406 .sr(1)
24407 .m(m)
24408 .n(n)
24409 .k(1)
24410 .iterations(1)
24411 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24412 }
24413 }
24414 }
24415
24416 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
24417 TEST_REQUIRES_X86_AVX512F;
24418 for (uint32_t m = 1; m <= 7; m++) {
24419 GemmMicrokernelTester()
24420 .mr(7)
24421 .nr(16)
24422 .kr(1)
24423 .sr(1)
24424 .m(m)
24425 .n(16)
24426 .k(1)
24427 .iterations(1)
24428 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24429 }
24430 }
24431
24432 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
24433 TEST_REQUIRES_X86_AVX512F;
24434 for (uint32_t n = 1; n <= 16; n++) {
24435 GemmMicrokernelTester()
24436 .mr(7)
24437 .nr(16)
24438 .kr(1)
24439 .sr(1)
24440 .m(7)
24441 .n(n)
24442 .k(1)
24443 .iterations(1)
24444 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24445 }
24446 }
24447
24448 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1) {
24449 TEST_REQUIRES_X86_AVX512F;
24450 for (size_t k = 2; k < 10; k++) {
24451 GemmMicrokernelTester()
24452 .mr(7)
24453 .nr(16)
24454 .kr(1)
24455 .sr(1)
24456 .m(7)
24457 .n(16)
24458 .k(k)
24459 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24460 }
24461 }
24462
24463 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, k_gt_1_subtile) {
24464 TEST_REQUIRES_X86_AVX512F;
24465 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024466 for (uint32_t n = 1; n <= 16; n++) {
24467 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024468 GemmMicrokernelTester()
24469 .mr(7)
24470 .nr(16)
24471 .kr(1)
24472 .sr(1)
24473 .m(m)
24474 .n(n)
24475 .k(k)
24476 .iterations(1)
24477 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24478 }
24479 }
24480 }
24481 }
24482
24483 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16) {
24484 TEST_REQUIRES_X86_AVX512F;
24485 for (uint32_t n = 17; n < 32; n++) {
24486 for (size_t k = 1; k <= 5; k += 2) {
24487 GemmMicrokernelTester()
24488 .mr(7)
24489 .nr(16)
24490 .kr(1)
24491 .sr(1)
24492 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024493 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024494 .k(k)
24495 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24496 }
24497 }
24498 }
24499
24500 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
24501 TEST_REQUIRES_X86_AVX512F;
24502 for (uint32_t n = 17; n < 32; n++) {
24503 for (size_t k = 1; k <= 5; k += 2) {
24504 GemmMicrokernelTester()
24505 .mr(7)
24506 .nr(16)
24507 .kr(1)
24508 .sr(1)
24509 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024510 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024511 .k(k)
24512 .cn_stride(19)
24513 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24514 }
24515 }
24516 }
24517
24518 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_subtile) {
24519 TEST_REQUIRES_X86_AVX512F;
24520 for (uint32_t n = 17; n < 32; n++) {
24521 for (size_t k = 1; k <= 5; k += 2) {
24522 for (uint32_t m = 1; m <= 7; m++) {
24523 GemmMicrokernelTester()
24524 .mr(7)
24525 .nr(16)
24526 .kr(1)
24527 .sr(1)
24528 .m(m)
24529 .n(n)
24530 .k(k)
24531 .iterations(1)
24532 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24533 }
24534 }
24535 }
24536 }
24537
24538 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16) {
24539 TEST_REQUIRES_X86_AVX512F;
24540 for (uint32_t n = 32; n <= 48; n += 16) {
24541 for (size_t k = 1; k <= 5; k += 2) {
24542 GemmMicrokernelTester()
24543 .mr(7)
24544 .nr(16)
24545 .kr(1)
24546 .sr(1)
24547 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024548 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024549 .k(k)
24550 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24551 }
24552 }
24553 }
24554
24555 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
24556 TEST_REQUIRES_X86_AVX512F;
24557 for (uint32_t n = 32; n <= 48; n += 16) {
24558 for (size_t k = 1; k <= 5; k += 2) {
24559 GemmMicrokernelTester()
24560 .mr(7)
24561 .nr(16)
24562 .kr(1)
24563 .sr(1)
24564 .m(7)
24565 .n(n)
24566 .k(k)
24567 .cn_stride(19)
24568 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24569 }
24570 }
24571 }
24572
24573 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_subtile) {
24574 TEST_REQUIRES_X86_AVX512F;
24575 for (uint32_t n = 32; n <= 48; n += 16) {
24576 for (size_t k = 1; k <= 5; k += 2) {
24577 for (uint32_t m = 1; m <= 7; m++) {
24578 GemmMicrokernelTester()
24579 .mr(7)
24580 .nr(16)
24581 .kr(1)
24582 .sr(1)
24583 .m(m)
24584 .n(n)
24585 .k(k)
24586 .iterations(1)
24587 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24588 }
24589 }
24590 }
24591 }
24592
24593 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, small_kernel) {
24594 TEST_REQUIRES_X86_AVX512F;
24595 for (size_t k = 1; k <= 5; k += 2) {
24596 GemmMicrokernelTester()
24597 .mr(7)
24598 .nr(16)
24599 .kr(1)
24600 .sr(1)
24601 .m(7)
24602 .n(16)
24603 .k(k)
24604 .ks(3)
24605 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24606 }
24607 }
24608
24609 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, small_kernel_subtile) {
24610 TEST_REQUIRES_X86_AVX512F;
24611 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024612 for (uint32_t n = 1; n <= 16; n++) {
24613 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024614 GemmMicrokernelTester()
24615 .mr(7)
24616 .nr(16)
24617 .kr(1)
24618 .sr(1)
24619 .m(m)
24620 .n(n)
24621 .k(k)
24622 .ks(3)
24623 .iterations(1)
24624 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24625 }
24626 }
24627 }
24628 }
24629
24630 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
24631 TEST_REQUIRES_X86_AVX512F;
24632 for (uint32_t n = 17; n < 32; n++) {
24633 for (size_t k = 1; k <= 5; k += 2) {
24634 GemmMicrokernelTester()
24635 .mr(7)
24636 .nr(16)
24637 .kr(1)
24638 .sr(1)
24639 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024640 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024641 .k(k)
24642 .ks(3)
24643 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24644 }
24645 }
24646 }
24647
24648 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
24649 TEST_REQUIRES_X86_AVX512F;
24650 for (uint32_t n = 32; n <= 48; n += 16) {
24651 for (size_t k = 1; k <= 5; k += 2) {
24652 GemmMicrokernelTester()
24653 .mr(7)
24654 .nr(16)
24655 .kr(1)
24656 .sr(1)
24657 .m(7)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024658 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024659 .k(k)
24660 .ks(3)
24661 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24662 }
24663 }
24664 }
24665
24666 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cm_subtile) {
24667 TEST_REQUIRES_X86_AVX512F;
24668 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024669 for (uint32_t n = 1; n <= 16; n++) {
24670 for (uint32_t m = 1; m <= 7; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024671 GemmMicrokernelTester()
24672 .mr(7)
24673 .nr(16)
24674 .kr(1)
24675 .sr(1)
24676 .m(m)
24677 .n(n)
24678 .k(k)
24679 .cm_stride(19)
24680 .iterations(1)
24681 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24682 }
24683 }
24684 }
24685 }
24686
24687 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, a_offset) {
24688 TEST_REQUIRES_X86_AVX512F;
24689 for (size_t k = 1; k <= 5; k += 2) {
24690 GemmMicrokernelTester()
24691 .mr(7)
24692 .nr(16)
24693 .kr(1)
24694 .sr(1)
24695 .m(7)
24696 .n(16)
24697 .k(k)
24698 .ks(3)
24699 .a_offset(37)
24700 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24701 }
24702 }
24703
24704 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, zero) {
24705 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024706 for (size_t k = 1; k <= 5; k += 2) {
24707 for (uint32_t mz = 0; mz < 7; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024708 GemmMicrokernelTester()
24709 .mr(7)
24710 .nr(16)
24711 .kr(1)
24712 .sr(1)
24713 .m(7)
24714 .n(16)
24715 .k(k)
24716 .ks(3)
24717 .a_offset(37)
24718 .zero_index(mz)
24719 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24720 }
24721 }
24722 }
24723
24724 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, qmin) {
24725 TEST_REQUIRES_X86_AVX512F;
24726 GemmMicrokernelTester()
24727 .mr(7)
24728 .nr(16)
24729 .kr(1)
24730 .sr(1)
24731 .m(7)
24732 .n(16)
24733 .k(1)
24734 .qmin(128)
24735 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24736 }
24737
24738 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, qmax) {
24739 TEST_REQUIRES_X86_AVX512F;
24740 GemmMicrokernelTester()
24741 .mr(7)
24742 .nr(16)
24743 .kr(1)
24744 .sr(1)
24745 .m(7)
24746 .n(16)
24747 .k(1)
24748 .qmax(128)
24749 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24750 }
24751
24752 TEST(F32_IGEMM_MINMAX_7X16__AVX512F_BROADCAST, strided_cm) {
24753 TEST_REQUIRES_X86_AVX512F;
24754 GemmMicrokernelTester()
24755 .mr(7)
24756 .nr(16)
24757 .kr(1)
24758 .sr(1)
24759 .m(7)
24760 .n(16)
24761 .k(1)
24762 .cm_stride(19)
24763 .Test(xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24764 }
24765#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24766
24767
24768#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24769 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1) {
24770 TEST_REQUIRES_X86_AVX512F;
24771 GemmMicrokernelTester()
24772 .mr(8)
24773 .nr(16)
24774 .kr(1)
24775 .sr(1)
24776 .m(8)
24777 .n(16)
24778 .k(1)
24779 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24780 }
24781
24782 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cn) {
24783 TEST_REQUIRES_X86_AVX512F;
24784 GemmMicrokernelTester()
24785 .mr(8)
24786 .nr(16)
24787 .kr(1)
24788 .sr(1)
24789 .m(8)
24790 .n(16)
24791 .k(1)
24792 .cn_stride(19)
24793 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24794 }
24795
24796 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile) {
24797 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024798 for (uint32_t n = 1; n <= 16; n++) {
24799 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024800 GemmMicrokernelTester()
24801 .mr(8)
24802 .nr(16)
24803 .kr(1)
24804 .sr(1)
24805 .m(m)
24806 .n(n)
24807 .k(1)
24808 .iterations(1)
24809 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24810 }
24811 }
24812 }
24813
24814 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_m) {
24815 TEST_REQUIRES_X86_AVX512F;
24816 for (uint32_t m = 1; m <= 8; m++) {
24817 GemmMicrokernelTester()
24818 .mr(8)
24819 .nr(16)
24820 .kr(1)
24821 .sr(1)
24822 .m(m)
24823 .n(16)
24824 .k(1)
24825 .iterations(1)
24826 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24827 }
24828 }
24829
24830 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_eq_1_subtile_n) {
24831 TEST_REQUIRES_X86_AVX512F;
24832 for (uint32_t n = 1; n <= 16; n++) {
24833 GemmMicrokernelTester()
24834 .mr(8)
24835 .nr(16)
24836 .kr(1)
24837 .sr(1)
24838 .m(8)
24839 .n(n)
24840 .k(1)
24841 .iterations(1)
24842 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24843 }
24844 }
24845
24846 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1) {
24847 TEST_REQUIRES_X86_AVX512F;
24848 for (size_t k = 2; k < 10; k++) {
24849 GemmMicrokernelTester()
24850 .mr(8)
24851 .nr(16)
24852 .kr(1)
24853 .sr(1)
24854 .m(8)
24855 .n(16)
24856 .k(k)
24857 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24858 }
24859 }
24860
24861 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, k_gt_1_subtile) {
24862 TEST_REQUIRES_X86_AVX512F;
24863 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024864 for (uint32_t n = 1; n <= 16; n++) {
24865 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024866 GemmMicrokernelTester()
24867 .mr(8)
24868 .nr(16)
24869 .kr(1)
24870 .sr(1)
24871 .m(m)
24872 .n(n)
24873 .k(k)
24874 .iterations(1)
24875 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24876 }
24877 }
24878 }
24879 }
24880
24881 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16) {
24882 TEST_REQUIRES_X86_AVX512F;
24883 for (uint32_t n = 17; n < 32; n++) {
24884 for (size_t k = 1; k <= 5; k += 2) {
24885 GemmMicrokernelTester()
24886 .mr(8)
24887 .nr(16)
24888 .kr(1)
24889 .sr(1)
24890 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024891 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024892 .k(k)
24893 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24894 }
24895 }
24896 }
24897
24898 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_strided_cn) {
24899 TEST_REQUIRES_X86_AVX512F;
24900 for (uint32_t n = 17; n < 32; n++) {
24901 for (size_t k = 1; k <= 5; k += 2) {
24902 GemmMicrokernelTester()
24903 .mr(8)
24904 .nr(16)
24905 .kr(1)
24906 .sr(1)
24907 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024908 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024909 .k(k)
24910 .cn_stride(19)
24911 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24912 }
24913 }
24914 }
24915
24916 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_subtile) {
24917 TEST_REQUIRES_X86_AVX512F;
24918 for (uint32_t n = 17; n < 32; n++) {
24919 for (size_t k = 1; k <= 5; k += 2) {
24920 for (uint32_t m = 1; m <= 8; m++) {
24921 GemmMicrokernelTester()
24922 .mr(8)
24923 .nr(16)
24924 .kr(1)
24925 .sr(1)
24926 .m(m)
24927 .n(n)
24928 .k(k)
24929 .iterations(1)
24930 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24931 }
24932 }
24933 }
24934 }
24935
24936 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16) {
24937 TEST_REQUIRES_X86_AVX512F;
24938 for (uint32_t n = 32; n <= 48; n += 16) {
24939 for (size_t k = 1; k <= 5; k += 2) {
24940 GemmMicrokernelTester()
24941 .mr(8)
24942 .nr(16)
24943 .kr(1)
24944 .sr(1)
24945 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080024947 .k(k)
24948 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24949 }
24950 }
24951 }
24952
24953 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_strided_cn) {
24954 TEST_REQUIRES_X86_AVX512F;
24955 for (uint32_t n = 32; n <= 48; n += 16) {
24956 for (size_t k = 1; k <= 5; k += 2) {
24957 GemmMicrokernelTester()
24958 .mr(8)
24959 .nr(16)
24960 .kr(1)
24961 .sr(1)
24962 .m(8)
24963 .n(n)
24964 .k(k)
24965 .cn_stride(19)
24966 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24967 }
24968 }
24969 }
24970
24971 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_subtile) {
24972 TEST_REQUIRES_X86_AVX512F;
24973 for (uint32_t n = 32; n <= 48; n += 16) {
24974 for (size_t k = 1; k <= 5; k += 2) {
24975 for (uint32_t m = 1; m <= 8; m++) {
24976 GemmMicrokernelTester()
24977 .mr(8)
24978 .nr(16)
24979 .kr(1)
24980 .sr(1)
24981 .m(m)
24982 .n(n)
24983 .k(k)
24984 .iterations(1)
24985 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
24986 }
24987 }
24988 }
24989 }
24990
24991 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, small_kernel) {
24992 TEST_REQUIRES_X86_AVX512F;
24993 for (size_t k = 1; k <= 5; k += 2) {
24994 GemmMicrokernelTester()
24995 .mr(8)
24996 .nr(16)
24997 .kr(1)
24998 .sr(1)
24999 .m(8)
25000 .n(16)
25001 .k(k)
25002 .ks(3)
25003 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25004 }
25005 }
25006
25007 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, small_kernel_subtile) {
25008 TEST_REQUIRES_X86_AVX512F;
25009 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025010 for (uint32_t n = 1; n <= 16; n++) {
25011 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025012 GemmMicrokernelTester()
25013 .mr(8)
25014 .nr(16)
25015 .kr(1)
25016 .sr(1)
25017 .m(m)
25018 .n(n)
25019 .k(k)
25020 .ks(3)
25021 .iterations(1)
25022 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25023 }
25024 }
25025 }
25026 }
25027
25028 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_gt_16_small_kernel) {
25029 TEST_REQUIRES_X86_AVX512F;
25030 for (uint32_t n = 17; n < 32; n++) {
25031 for (size_t k = 1; k <= 5; k += 2) {
25032 GemmMicrokernelTester()
25033 .mr(8)
25034 .nr(16)
25035 .kr(1)
25036 .sr(1)
25037 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025038 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025039 .k(k)
25040 .ks(3)
25041 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25042 }
25043 }
25044 }
25045
25046 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, n_div_16_small_kernel) {
25047 TEST_REQUIRES_X86_AVX512F;
25048 for (uint32_t n = 32; n <= 48; n += 16) {
25049 for (size_t k = 1; k <= 5; k += 2) {
25050 GemmMicrokernelTester()
25051 .mr(8)
25052 .nr(16)
25053 .kr(1)
25054 .sr(1)
25055 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025056 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025057 .k(k)
25058 .ks(3)
25059 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25060 }
25061 }
25062 }
25063
25064 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cm_subtile) {
25065 TEST_REQUIRES_X86_AVX512F;
25066 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025067 for (uint32_t n = 1; n <= 16; n++) {
25068 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025069 GemmMicrokernelTester()
25070 .mr(8)
25071 .nr(16)
25072 .kr(1)
25073 .sr(1)
25074 .m(m)
25075 .n(n)
25076 .k(k)
25077 .cm_stride(19)
25078 .iterations(1)
25079 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25080 }
25081 }
25082 }
25083 }
25084
25085 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, a_offset) {
25086 TEST_REQUIRES_X86_AVX512F;
25087 for (size_t k = 1; k <= 5; k += 2) {
25088 GemmMicrokernelTester()
25089 .mr(8)
25090 .nr(16)
25091 .kr(1)
25092 .sr(1)
25093 .m(8)
25094 .n(16)
25095 .k(k)
25096 .ks(3)
25097 .a_offset(43)
25098 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25099 }
25100 }
25101
25102 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, zero) {
25103 TEST_REQUIRES_X86_AVX512F;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025104 for (size_t k = 1; k <= 5; k += 2) {
25105 for (uint32_t mz = 0; mz < 8; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025106 GemmMicrokernelTester()
25107 .mr(8)
25108 .nr(16)
25109 .kr(1)
25110 .sr(1)
25111 .m(8)
25112 .n(16)
25113 .k(k)
25114 .ks(3)
25115 .a_offset(43)
25116 .zero_index(mz)
25117 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25118 }
25119 }
25120 }
25121
25122 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, qmin) {
25123 TEST_REQUIRES_X86_AVX512F;
25124 GemmMicrokernelTester()
25125 .mr(8)
25126 .nr(16)
25127 .kr(1)
25128 .sr(1)
25129 .m(8)
25130 .n(16)
25131 .k(1)
25132 .qmin(128)
25133 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25134 }
25135
25136 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, qmax) {
25137 TEST_REQUIRES_X86_AVX512F;
25138 GemmMicrokernelTester()
25139 .mr(8)
25140 .nr(16)
25141 .kr(1)
25142 .sr(1)
25143 .m(8)
25144 .n(16)
25145 .k(1)
25146 .qmax(128)
25147 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25148 }
25149
25150 TEST(F32_IGEMM_MINMAX_8X16__AVX512F_BROADCAST, strided_cm) {
25151 TEST_REQUIRES_X86_AVX512F;
25152 GemmMicrokernelTester()
25153 .mr(8)
25154 .nr(16)
25155 .kr(1)
25156 .sr(1)
25157 .m(8)
25158 .n(16)
25159 .k(1)
25160 .cm_stride(19)
25161 .Test(xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, xnn_init_f32_minmax_scalar_params);
25162 }
25163#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25164
25165
25166#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25167 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1) {
25168 GemmMicrokernelTester()
25169 .mr(5)
25170 .nr(8)
25171 .kr(1)
25172 .sr(1)
25173 .m(5)
25174 .n(8)
25175 .k(1)
25176 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25177 }
25178
25179 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cn) {
25180 GemmMicrokernelTester()
25181 .mr(5)
25182 .nr(8)
25183 .kr(1)
25184 .sr(1)
25185 .m(5)
25186 .n(8)
25187 .k(1)
25188 .cn_stride(11)
25189 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25190 }
25191
25192 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025193 for (uint32_t n = 1; n <= 8; n++) {
25194 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025195 GemmMicrokernelTester()
25196 .mr(5)
25197 .nr(8)
25198 .kr(1)
25199 .sr(1)
25200 .m(m)
25201 .n(n)
25202 .k(1)
25203 .iterations(1)
25204 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25205 }
25206 }
25207 }
25208
25209 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_m) {
25210 for (uint32_t m = 1; m <= 5; m++) {
25211 GemmMicrokernelTester()
25212 .mr(5)
25213 .nr(8)
25214 .kr(1)
25215 .sr(1)
25216 .m(m)
25217 .n(8)
25218 .k(1)
25219 .iterations(1)
25220 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25221 }
25222 }
25223
25224 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_eq_1_subtile_n) {
25225 for (uint32_t n = 1; n <= 8; n++) {
25226 GemmMicrokernelTester()
25227 .mr(5)
25228 .nr(8)
25229 .kr(1)
25230 .sr(1)
25231 .m(5)
25232 .n(n)
25233 .k(1)
25234 .iterations(1)
25235 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25236 }
25237 }
25238
25239 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1) {
25240 for (size_t k = 2; k < 10; k++) {
25241 GemmMicrokernelTester()
25242 .mr(5)
25243 .nr(8)
25244 .kr(1)
25245 .sr(1)
25246 .m(5)
25247 .n(8)
25248 .k(k)
25249 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25250 }
25251 }
25252
25253 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, k_gt_1_subtile) {
25254 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025255 for (uint32_t n = 1; n <= 8; n++) {
25256 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025257 GemmMicrokernelTester()
25258 .mr(5)
25259 .nr(8)
25260 .kr(1)
25261 .sr(1)
25262 .m(m)
25263 .n(n)
25264 .k(k)
25265 .iterations(1)
25266 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25267 }
25268 }
25269 }
25270 }
25271
25272 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8) {
25273 for (uint32_t n = 9; n < 16; n++) {
25274 for (size_t k = 1; k <= 5; k += 2) {
25275 GemmMicrokernelTester()
25276 .mr(5)
25277 .nr(8)
25278 .kr(1)
25279 .sr(1)
25280 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025281 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025282 .k(k)
25283 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25284 }
25285 }
25286 }
25287
25288 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_strided_cn) {
25289 for (uint32_t n = 9; n < 16; n++) {
25290 for (size_t k = 1; k <= 5; k += 2) {
25291 GemmMicrokernelTester()
25292 .mr(5)
25293 .nr(8)
25294 .kr(1)
25295 .sr(1)
25296 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025297 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025298 .k(k)
25299 .cn_stride(11)
25300 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25301 }
25302 }
25303 }
25304
25305 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_subtile) {
25306 for (uint32_t n = 9; n < 16; n++) {
25307 for (size_t k = 1; k <= 5; k += 2) {
25308 for (uint32_t m = 1; m <= 5; m++) {
25309 GemmMicrokernelTester()
25310 .mr(5)
25311 .nr(8)
25312 .kr(1)
25313 .sr(1)
25314 .m(m)
25315 .n(n)
25316 .k(k)
25317 .iterations(1)
25318 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25319 }
25320 }
25321 }
25322 }
25323
25324 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8) {
25325 for (uint32_t n = 16; n <= 24; n += 8) {
25326 for (size_t k = 1; k <= 5; k += 2) {
25327 GemmMicrokernelTester()
25328 .mr(5)
25329 .nr(8)
25330 .kr(1)
25331 .sr(1)
25332 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025333 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025334 .k(k)
25335 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25336 }
25337 }
25338 }
25339
25340 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_strided_cn) {
25341 for (uint32_t n = 16; n <= 24; n += 8) {
25342 for (size_t k = 1; k <= 5; k += 2) {
25343 GemmMicrokernelTester()
25344 .mr(5)
25345 .nr(8)
25346 .kr(1)
25347 .sr(1)
25348 .m(5)
25349 .n(n)
25350 .k(k)
25351 .cn_stride(11)
25352 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25353 }
25354 }
25355 }
25356
25357 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_subtile) {
25358 for (uint32_t n = 16; n <= 24; n += 8) {
25359 for (size_t k = 1; k <= 5; k += 2) {
25360 for (uint32_t m = 1; m <= 5; m++) {
25361 GemmMicrokernelTester()
25362 .mr(5)
25363 .nr(8)
25364 .kr(1)
25365 .sr(1)
25366 .m(m)
25367 .n(n)
25368 .k(k)
25369 .iterations(1)
25370 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25371 }
25372 }
25373 }
25374 }
25375
25376 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, small_kernel) {
25377 for (size_t k = 1; k <= 5; k += 2) {
25378 GemmMicrokernelTester()
25379 .mr(5)
25380 .nr(8)
25381 .kr(1)
25382 .sr(1)
25383 .m(5)
25384 .n(8)
25385 .k(k)
25386 .ks(3)
25387 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25388 }
25389 }
25390
25391 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, small_kernel_subtile) {
25392 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025393 for (uint32_t n = 1; n <= 8; n++) {
25394 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025395 GemmMicrokernelTester()
25396 .mr(5)
25397 .nr(8)
25398 .kr(1)
25399 .sr(1)
25400 .m(m)
25401 .n(n)
25402 .k(k)
25403 .ks(3)
25404 .iterations(1)
25405 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25406 }
25407 }
25408 }
25409 }
25410
25411 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_gt_8_small_kernel) {
25412 for (uint32_t n = 9; n < 16; n++) {
25413 for (size_t k = 1; k <= 5; k += 2) {
25414 GemmMicrokernelTester()
25415 .mr(5)
25416 .nr(8)
25417 .kr(1)
25418 .sr(1)
25419 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025420 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025421 .k(k)
25422 .ks(3)
25423 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25424 }
25425 }
25426 }
25427
25428 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, n_div_8_small_kernel) {
25429 for (uint32_t n = 16; n <= 24; n += 8) {
25430 for (size_t k = 1; k <= 5; k += 2) {
25431 GemmMicrokernelTester()
25432 .mr(5)
25433 .nr(8)
25434 .kr(1)
25435 .sr(1)
25436 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025437 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025438 .k(k)
25439 .ks(3)
25440 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25441 }
25442 }
25443 }
25444
25445 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm_subtile) {
25446 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025447 for (uint32_t n = 1; n <= 8; n++) {
25448 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025449 GemmMicrokernelTester()
25450 .mr(5)
25451 .nr(8)
25452 .kr(1)
25453 .sr(1)
25454 .m(m)
25455 .n(n)
25456 .k(k)
25457 .cm_stride(11)
25458 .iterations(1)
25459 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25460 }
25461 }
25462 }
25463 }
25464
25465 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, a_offset) {
25466 for (size_t k = 1; k <= 5; k += 2) {
25467 GemmMicrokernelTester()
25468 .mr(5)
25469 .nr(8)
25470 .kr(1)
25471 .sr(1)
25472 .m(5)
25473 .n(8)
25474 .k(k)
25475 .ks(3)
25476 .a_offset(29)
25477 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25478 }
25479 }
25480
25481 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025482 for (size_t k = 1; k <= 5; k += 2) {
25483 for (uint32_t mz = 0; mz < 5; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025484 GemmMicrokernelTester()
25485 .mr(5)
25486 .nr(8)
25487 .kr(1)
25488 .sr(1)
25489 .m(5)
25490 .n(8)
25491 .k(k)
25492 .ks(3)
25493 .a_offset(29)
25494 .zero_index(mz)
25495 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25496 }
25497 }
25498 }
25499
25500 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmin) {
25501 GemmMicrokernelTester()
25502 .mr(5)
25503 .nr(8)
25504 .kr(1)
25505 .sr(1)
25506 .m(5)
25507 .n(8)
25508 .k(1)
25509 .qmin(128)
25510 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25511 }
25512
25513 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, qmax) {
25514 GemmMicrokernelTester()
25515 .mr(5)
25516 .nr(8)
25517 .kr(1)
25518 .sr(1)
25519 .m(5)
25520 .n(8)
25521 .k(1)
25522 .qmax(128)
25523 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25524 }
25525
25526 TEST(F32_IGEMM_MINMAX_5X8__WASMSIMD_ARM_LOADSPLAT, strided_cm) {
25527 GemmMicrokernelTester()
25528 .mr(5)
25529 .nr(8)
25530 .kr(1)
25531 .sr(1)
25532 .m(5)
25533 .n(8)
25534 .k(1)
25535 .cm_stride(11)
25536 .Test(xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25537 }
25538#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25539
25540
25541#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25542 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
25543 GemmMicrokernelTester()
25544 .mr(1)
25545 .nr(8)
25546 .kr(1)
25547 .sr(1)
25548 .m(1)
25549 .n(8)
25550 .k(1)
25551 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25552 }
25553
25554 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
25555 GemmMicrokernelTester()
25556 .mr(1)
25557 .nr(8)
25558 .kr(1)
25559 .sr(1)
25560 .m(1)
25561 .n(8)
25562 .k(1)
25563 .cn_stride(11)
25564 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25565 }
25566
25567 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025568 for (uint32_t n = 1; n <= 8; n++) {
25569 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025570 GemmMicrokernelTester()
25571 .mr(1)
25572 .nr(8)
25573 .kr(1)
25574 .sr(1)
25575 .m(m)
25576 .n(n)
25577 .k(1)
25578 .iterations(1)
25579 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25580 }
25581 }
25582 }
25583
25584 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
25585 for (uint32_t m = 1; m <= 1; m++) {
25586 GemmMicrokernelTester()
25587 .mr(1)
25588 .nr(8)
25589 .kr(1)
25590 .sr(1)
25591 .m(m)
25592 .n(8)
25593 .k(1)
25594 .iterations(1)
25595 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25596 }
25597 }
25598
25599 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
25600 for (uint32_t n = 1; n <= 8; n++) {
25601 GemmMicrokernelTester()
25602 .mr(1)
25603 .nr(8)
25604 .kr(1)
25605 .sr(1)
25606 .m(1)
25607 .n(n)
25608 .k(1)
25609 .iterations(1)
25610 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25611 }
25612 }
25613
25614 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
25615 for (size_t k = 2; k < 10; k++) {
25616 GemmMicrokernelTester()
25617 .mr(1)
25618 .nr(8)
25619 .kr(1)
25620 .sr(1)
25621 .m(1)
25622 .n(8)
25623 .k(k)
25624 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25625 }
25626 }
25627
25628 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
25629 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025630 for (uint32_t n = 1; n <= 8; n++) {
25631 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025632 GemmMicrokernelTester()
25633 .mr(1)
25634 .nr(8)
25635 .kr(1)
25636 .sr(1)
25637 .m(m)
25638 .n(n)
25639 .k(k)
25640 .iterations(1)
25641 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25642 }
25643 }
25644 }
25645 }
25646
25647 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
25648 for (uint32_t n = 9; n < 16; n++) {
25649 for (size_t k = 1; k <= 5; k += 2) {
25650 GemmMicrokernelTester()
25651 .mr(1)
25652 .nr(8)
25653 .kr(1)
25654 .sr(1)
25655 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025656 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025657 .k(k)
25658 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25659 }
25660 }
25661 }
25662
25663 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
25664 for (uint32_t n = 9; n < 16; n++) {
25665 for (size_t k = 1; k <= 5; k += 2) {
25666 GemmMicrokernelTester()
25667 .mr(1)
25668 .nr(8)
25669 .kr(1)
25670 .sr(1)
25671 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025672 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025673 .k(k)
25674 .cn_stride(11)
25675 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25676 }
25677 }
25678 }
25679
25680 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
25681 for (uint32_t n = 9; n < 16; n++) {
25682 for (size_t k = 1; k <= 5; k += 2) {
25683 for (uint32_t m = 1; m <= 1; m++) {
25684 GemmMicrokernelTester()
25685 .mr(1)
25686 .nr(8)
25687 .kr(1)
25688 .sr(1)
25689 .m(m)
25690 .n(n)
25691 .k(k)
25692 .iterations(1)
25693 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25694 }
25695 }
25696 }
25697 }
25698
25699 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
25700 for (uint32_t n = 16; n <= 24; n += 8) {
25701 for (size_t k = 1; k <= 5; k += 2) {
25702 GemmMicrokernelTester()
25703 .mr(1)
25704 .nr(8)
25705 .kr(1)
25706 .sr(1)
25707 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025708 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025709 .k(k)
25710 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25711 }
25712 }
25713 }
25714
25715 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
25716 for (uint32_t n = 16; n <= 24; n += 8) {
25717 for (size_t k = 1; k <= 5; k += 2) {
25718 GemmMicrokernelTester()
25719 .mr(1)
25720 .nr(8)
25721 .kr(1)
25722 .sr(1)
25723 .m(1)
25724 .n(n)
25725 .k(k)
25726 .cn_stride(11)
25727 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25728 }
25729 }
25730 }
25731
25732 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
25733 for (uint32_t n = 16; n <= 24; n += 8) {
25734 for (size_t k = 1; k <= 5; k += 2) {
25735 for (uint32_t m = 1; m <= 1; m++) {
25736 GemmMicrokernelTester()
25737 .mr(1)
25738 .nr(8)
25739 .kr(1)
25740 .sr(1)
25741 .m(m)
25742 .n(n)
25743 .k(k)
25744 .iterations(1)
25745 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25746 }
25747 }
25748 }
25749 }
25750
25751 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
25752 for (size_t k = 1; k <= 5; k += 2) {
25753 GemmMicrokernelTester()
25754 .mr(1)
25755 .nr(8)
25756 .kr(1)
25757 .sr(1)
25758 .m(1)
25759 .n(8)
25760 .k(k)
25761 .ks(3)
25762 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25763 }
25764 }
25765
25766 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
25767 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025768 for (uint32_t n = 1; n <= 8; n++) {
25769 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025770 GemmMicrokernelTester()
25771 .mr(1)
25772 .nr(8)
25773 .kr(1)
25774 .sr(1)
25775 .m(m)
25776 .n(n)
25777 .k(k)
25778 .ks(3)
25779 .iterations(1)
25780 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25781 }
25782 }
25783 }
25784 }
25785
25786 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
25787 for (uint32_t n = 9; n < 16; n++) {
25788 for (size_t k = 1; k <= 5; k += 2) {
25789 GemmMicrokernelTester()
25790 .mr(1)
25791 .nr(8)
25792 .kr(1)
25793 .sr(1)
25794 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025795 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025796 .k(k)
25797 .ks(3)
25798 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25799 }
25800 }
25801 }
25802
25803 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
25804 for (uint32_t n = 16; n <= 24; n += 8) {
25805 for (size_t k = 1; k <= 5; k += 2) {
25806 GemmMicrokernelTester()
25807 .mr(1)
25808 .nr(8)
25809 .kr(1)
25810 .sr(1)
25811 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025812 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025813 .k(k)
25814 .ks(3)
25815 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25816 }
25817 }
25818 }
25819
25820 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
25821 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025822 for (uint32_t n = 1; n <= 8; n++) {
25823 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025824 GemmMicrokernelTester()
25825 .mr(1)
25826 .nr(8)
25827 .kr(1)
25828 .sr(1)
25829 .m(m)
25830 .n(n)
25831 .k(k)
25832 .cm_stride(11)
25833 .iterations(1)
25834 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25835 }
25836 }
25837 }
25838 }
25839
25840 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
25841 for (size_t k = 1; k <= 5; k += 2) {
25842 GemmMicrokernelTester()
25843 .mr(1)
25844 .nr(8)
25845 .kr(1)
25846 .sr(1)
25847 .m(1)
25848 .n(8)
25849 .k(k)
25850 .ks(3)
25851 .a_offset(7)
25852 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25853 }
25854 }
25855
25856 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025857 for (size_t k = 1; k <= 5; k += 2) {
25858 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025859 GemmMicrokernelTester()
25860 .mr(1)
25861 .nr(8)
25862 .kr(1)
25863 .sr(1)
25864 .m(1)
25865 .n(8)
25866 .k(k)
25867 .ks(3)
25868 .a_offset(7)
25869 .zero_index(mz)
25870 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25871 }
25872 }
25873 }
25874
25875 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmin) {
25876 GemmMicrokernelTester()
25877 .mr(1)
25878 .nr(8)
25879 .kr(1)
25880 .sr(1)
25881 .m(1)
25882 .n(8)
25883 .k(1)
25884 .qmin(128)
25885 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25886 }
25887
25888 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, qmax) {
25889 GemmMicrokernelTester()
25890 .mr(1)
25891 .nr(8)
25892 .kr(1)
25893 .sr(1)
25894 .m(1)
25895 .n(8)
25896 .k(1)
25897 .qmax(128)
25898 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25899 }
25900
25901 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
25902 GemmMicrokernelTester()
25903 .mr(1)
25904 .nr(8)
25905 .kr(1)
25906 .sr(1)
25907 .m(1)
25908 .n(8)
25909 .k(1)
25910 .cm_stride(11)
25911 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25912 }
25913#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25914
25915
25916#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25917 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
25918 GemmMicrokernelTester()
25919 .mr(3)
25920 .nr(8)
25921 .kr(1)
25922 .sr(1)
25923 .m(3)
25924 .n(8)
25925 .k(1)
25926 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25927 }
25928
25929 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
25930 GemmMicrokernelTester()
25931 .mr(3)
25932 .nr(8)
25933 .kr(1)
25934 .sr(1)
25935 .m(3)
25936 .n(8)
25937 .k(1)
25938 .cn_stride(11)
25939 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25940 }
25941
25942 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025943 for (uint32_t n = 1; n <= 8; n++) {
25944 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080025945 GemmMicrokernelTester()
25946 .mr(3)
25947 .nr(8)
25948 .kr(1)
25949 .sr(1)
25950 .m(m)
25951 .n(n)
25952 .k(1)
25953 .iterations(1)
25954 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25955 }
25956 }
25957 }
25958
25959 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
25960 for (uint32_t m = 1; m <= 3; m++) {
25961 GemmMicrokernelTester()
25962 .mr(3)
25963 .nr(8)
25964 .kr(1)
25965 .sr(1)
25966 .m(m)
25967 .n(8)
25968 .k(1)
25969 .iterations(1)
25970 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25971 }
25972 }
25973
25974 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
25975 for (uint32_t n = 1; n <= 8; n++) {
25976 GemmMicrokernelTester()
25977 .mr(3)
25978 .nr(8)
25979 .kr(1)
25980 .sr(1)
25981 .m(3)
25982 .n(n)
25983 .k(1)
25984 .iterations(1)
25985 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
25986 }
25987 }
25988
25989 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
25990 for (size_t k = 2; k < 10; k++) {
25991 GemmMicrokernelTester()
25992 .mr(3)
25993 .nr(8)
25994 .kr(1)
25995 .sr(1)
25996 .m(3)
25997 .n(8)
25998 .k(k)
25999 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26000 }
26001 }
26002
26003 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
26004 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026005 for (uint32_t n = 1; n <= 8; n++) {
26006 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026007 GemmMicrokernelTester()
26008 .mr(3)
26009 .nr(8)
26010 .kr(1)
26011 .sr(1)
26012 .m(m)
26013 .n(n)
26014 .k(k)
26015 .iterations(1)
26016 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26017 }
26018 }
26019 }
26020 }
26021
26022 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
26023 for (uint32_t n = 9; n < 16; n++) {
26024 for (size_t k = 1; k <= 5; k += 2) {
26025 GemmMicrokernelTester()
26026 .mr(3)
26027 .nr(8)
26028 .kr(1)
26029 .sr(1)
26030 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026031 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026032 .k(k)
26033 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26034 }
26035 }
26036 }
26037
26038 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
26039 for (uint32_t n = 9; n < 16; n++) {
26040 for (size_t k = 1; k <= 5; k += 2) {
26041 GemmMicrokernelTester()
26042 .mr(3)
26043 .nr(8)
26044 .kr(1)
26045 .sr(1)
26046 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026047 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026048 .k(k)
26049 .cn_stride(11)
26050 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26051 }
26052 }
26053 }
26054
26055 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
26056 for (uint32_t n = 9; n < 16; n++) {
26057 for (size_t k = 1; k <= 5; k += 2) {
26058 for (uint32_t m = 1; m <= 3; m++) {
26059 GemmMicrokernelTester()
26060 .mr(3)
26061 .nr(8)
26062 .kr(1)
26063 .sr(1)
26064 .m(m)
26065 .n(n)
26066 .k(k)
26067 .iterations(1)
26068 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26069 }
26070 }
26071 }
26072 }
26073
26074 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
26075 for (uint32_t n = 16; n <= 24; n += 8) {
26076 for (size_t k = 1; k <= 5; k += 2) {
26077 GemmMicrokernelTester()
26078 .mr(3)
26079 .nr(8)
26080 .kr(1)
26081 .sr(1)
26082 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026083 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026084 .k(k)
26085 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26086 }
26087 }
26088 }
26089
26090 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
26091 for (uint32_t n = 16; n <= 24; n += 8) {
26092 for (size_t k = 1; k <= 5; k += 2) {
26093 GemmMicrokernelTester()
26094 .mr(3)
26095 .nr(8)
26096 .kr(1)
26097 .sr(1)
26098 .m(3)
26099 .n(n)
26100 .k(k)
26101 .cn_stride(11)
26102 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26103 }
26104 }
26105 }
26106
26107 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
26108 for (uint32_t n = 16; n <= 24; n += 8) {
26109 for (size_t k = 1; k <= 5; k += 2) {
26110 for (uint32_t m = 1; m <= 3; m++) {
26111 GemmMicrokernelTester()
26112 .mr(3)
26113 .nr(8)
26114 .kr(1)
26115 .sr(1)
26116 .m(m)
26117 .n(n)
26118 .k(k)
26119 .iterations(1)
26120 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26121 }
26122 }
26123 }
26124 }
26125
26126 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
26127 for (size_t k = 1; k <= 5; k += 2) {
26128 GemmMicrokernelTester()
26129 .mr(3)
26130 .nr(8)
26131 .kr(1)
26132 .sr(1)
26133 .m(3)
26134 .n(8)
26135 .k(k)
26136 .ks(3)
26137 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26138 }
26139 }
26140
26141 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
26142 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026143 for (uint32_t n = 1; n <= 8; n++) {
26144 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026145 GemmMicrokernelTester()
26146 .mr(3)
26147 .nr(8)
26148 .kr(1)
26149 .sr(1)
26150 .m(m)
26151 .n(n)
26152 .k(k)
26153 .ks(3)
26154 .iterations(1)
26155 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26156 }
26157 }
26158 }
26159 }
26160
26161 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
26162 for (uint32_t n = 9; n < 16; n++) {
26163 for (size_t k = 1; k <= 5; k += 2) {
26164 GemmMicrokernelTester()
26165 .mr(3)
26166 .nr(8)
26167 .kr(1)
26168 .sr(1)
26169 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026170 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026171 .k(k)
26172 .ks(3)
26173 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26174 }
26175 }
26176 }
26177
26178 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
26179 for (uint32_t n = 16; n <= 24; n += 8) {
26180 for (size_t k = 1; k <= 5; k += 2) {
26181 GemmMicrokernelTester()
26182 .mr(3)
26183 .nr(8)
26184 .kr(1)
26185 .sr(1)
26186 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026187 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026188 .k(k)
26189 .ks(3)
26190 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26191 }
26192 }
26193 }
26194
26195 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
26196 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026197 for (uint32_t n = 1; n <= 8; n++) {
26198 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026199 GemmMicrokernelTester()
26200 .mr(3)
26201 .nr(8)
26202 .kr(1)
26203 .sr(1)
26204 .m(m)
26205 .n(n)
26206 .k(k)
26207 .cm_stride(11)
26208 .iterations(1)
26209 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26210 }
26211 }
26212 }
26213 }
26214
26215 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
26216 for (size_t k = 1; k <= 5; k += 2) {
26217 GemmMicrokernelTester()
26218 .mr(3)
26219 .nr(8)
26220 .kr(1)
26221 .sr(1)
26222 .m(3)
26223 .n(8)
26224 .k(k)
26225 .ks(3)
26226 .a_offset(17)
26227 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26228 }
26229 }
26230
26231 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026232 for (size_t k = 1; k <= 5; k += 2) {
26233 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026234 GemmMicrokernelTester()
26235 .mr(3)
26236 .nr(8)
26237 .kr(1)
26238 .sr(1)
26239 .m(3)
26240 .n(8)
26241 .k(k)
26242 .ks(3)
26243 .a_offset(17)
26244 .zero_index(mz)
26245 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26246 }
26247 }
26248 }
26249
26250 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmin) {
26251 GemmMicrokernelTester()
26252 .mr(3)
26253 .nr(8)
26254 .kr(1)
26255 .sr(1)
26256 .m(3)
26257 .n(8)
26258 .k(1)
26259 .qmin(128)
26260 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26261 }
26262
26263 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, qmax) {
26264 GemmMicrokernelTester()
26265 .mr(3)
26266 .nr(8)
26267 .kr(1)
26268 .sr(1)
26269 .m(3)
26270 .n(8)
26271 .k(1)
26272 .qmax(128)
26273 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26274 }
26275
26276 TEST(F32_IGEMM_MINMAX_3X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
26277 GemmMicrokernelTester()
26278 .mr(3)
26279 .nr(8)
26280 .kr(1)
26281 .sr(1)
26282 .m(3)
26283 .n(8)
26284 .k(1)
26285 .cm_stride(11)
26286 .Test(xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26287 }
26288#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26289
26290
26291#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26292 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
26293 GemmMicrokernelTester()
26294 .mr(4)
26295 .nr(8)
26296 .kr(1)
26297 .sr(1)
26298 .m(4)
26299 .n(8)
26300 .k(1)
26301 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26302 }
26303
26304 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
26305 GemmMicrokernelTester()
26306 .mr(4)
26307 .nr(8)
26308 .kr(1)
26309 .sr(1)
26310 .m(4)
26311 .n(8)
26312 .k(1)
26313 .cn_stride(11)
26314 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26315 }
26316
26317 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026318 for (uint32_t n = 1; n <= 8; n++) {
26319 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026320 GemmMicrokernelTester()
26321 .mr(4)
26322 .nr(8)
26323 .kr(1)
26324 .sr(1)
26325 .m(m)
26326 .n(n)
26327 .k(1)
26328 .iterations(1)
26329 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26330 }
26331 }
26332 }
26333
26334 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
26335 for (uint32_t m = 1; m <= 4; m++) {
26336 GemmMicrokernelTester()
26337 .mr(4)
26338 .nr(8)
26339 .kr(1)
26340 .sr(1)
26341 .m(m)
26342 .n(8)
26343 .k(1)
26344 .iterations(1)
26345 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26346 }
26347 }
26348
26349 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
26350 for (uint32_t n = 1; n <= 8; n++) {
26351 GemmMicrokernelTester()
26352 .mr(4)
26353 .nr(8)
26354 .kr(1)
26355 .sr(1)
26356 .m(4)
26357 .n(n)
26358 .k(1)
26359 .iterations(1)
26360 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26361 }
26362 }
26363
26364 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
26365 for (size_t k = 2; k < 10; k++) {
26366 GemmMicrokernelTester()
26367 .mr(4)
26368 .nr(8)
26369 .kr(1)
26370 .sr(1)
26371 .m(4)
26372 .n(8)
26373 .k(k)
26374 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26375 }
26376 }
26377
26378 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
26379 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026380 for (uint32_t n = 1; n <= 8; n++) {
26381 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026382 GemmMicrokernelTester()
26383 .mr(4)
26384 .nr(8)
26385 .kr(1)
26386 .sr(1)
26387 .m(m)
26388 .n(n)
26389 .k(k)
26390 .iterations(1)
26391 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26392 }
26393 }
26394 }
26395 }
26396
26397 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
26398 for (uint32_t n = 9; n < 16; n++) {
26399 for (size_t k = 1; k <= 5; k += 2) {
26400 GemmMicrokernelTester()
26401 .mr(4)
26402 .nr(8)
26403 .kr(1)
26404 .sr(1)
26405 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026406 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026407 .k(k)
26408 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26409 }
26410 }
26411 }
26412
26413 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
26414 for (uint32_t n = 9; n < 16; n++) {
26415 for (size_t k = 1; k <= 5; k += 2) {
26416 GemmMicrokernelTester()
26417 .mr(4)
26418 .nr(8)
26419 .kr(1)
26420 .sr(1)
26421 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026422 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026423 .k(k)
26424 .cn_stride(11)
26425 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26426 }
26427 }
26428 }
26429
26430 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
26431 for (uint32_t n = 9; n < 16; n++) {
26432 for (size_t k = 1; k <= 5; k += 2) {
26433 for (uint32_t m = 1; m <= 4; m++) {
26434 GemmMicrokernelTester()
26435 .mr(4)
26436 .nr(8)
26437 .kr(1)
26438 .sr(1)
26439 .m(m)
26440 .n(n)
26441 .k(k)
26442 .iterations(1)
26443 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26444 }
26445 }
26446 }
26447 }
26448
26449 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
26450 for (uint32_t n = 16; n <= 24; n += 8) {
26451 for (size_t k = 1; k <= 5; k += 2) {
26452 GemmMicrokernelTester()
26453 .mr(4)
26454 .nr(8)
26455 .kr(1)
26456 .sr(1)
26457 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026458 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026459 .k(k)
26460 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26461 }
26462 }
26463 }
26464
26465 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
26466 for (uint32_t n = 16; n <= 24; n += 8) {
26467 for (size_t k = 1; k <= 5; k += 2) {
26468 GemmMicrokernelTester()
26469 .mr(4)
26470 .nr(8)
26471 .kr(1)
26472 .sr(1)
26473 .m(4)
26474 .n(n)
26475 .k(k)
26476 .cn_stride(11)
26477 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26478 }
26479 }
26480 }
26481
26482 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
26483 for (uint32_t n = 16; n <= 24; n += 8) {
26484 for (size_t k = 1; k <= 5; k += 2) {
26485 for (uint32_t m = 1; m <= 4; m++) {
26486 GemmMicrokernelTester()
26487 .mr(4)
26488 .nr(8)
26489 .kr(1)
26490 .sr(1)
26491 .m(m)
26492 .n(n)
26493 .k(k)
26494 .iterations(1)
26495 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26496 }
26497 }
26498 }
26499 }
26500
26501 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
26502 for (size_t k = 1; k <= 5; k += 2) {
26503 GemmMicrokernelTester()
26504 .mr(4)
26505 .nr(8)
26506 .kr(1)
26507 .sr(1)
26508 .m(4)
26509 .n(8)
26510 .k(k)
26511 .ks(3)
26512 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26513 }
26514 }
26515
26516 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
26517 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026518 for (uint32_t n = 1; n <= 8; n++) {
26519 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026520 GemmMicrokernelTester()
26521 .mr(4)
26522 .nr(8)
26523 .kr(1)
26524 .sr(1)
26525 .m(m)
26526 .n(n)
26527 .k(k)
26528 .ks(3)
26529 .iterations(1)
26530 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26531 }
26532 }
26533 }
26534 }
26535
26536 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
26537 for (uint32_t n = 9; n < 16; n++) {
26538 for (size_t k = 1; k <= 5; k += 2) {
26539 GemmMicrokernelTester()
26540 .mr(4)
26541 .nr(8)
26542 .kr(1)
26543 .sr(1)
26544 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026545 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026546 .k(k)
26547 .ks(3)
26548 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26549 }
26550 }
26551 }
26552
26553 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
26554 for (uint32_t n = 16; n <= 24; n += 8) {
26555 for (size_t k = 1; k <= 5; k += 2) {
26556 GemmMicrokernelTester()
26557 .mr(4)
26558 .nr(8)
26559 .kr(1)
26560 .sr(1)
26561 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026562 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026563 .k(k)
26564 .ks(3)
26565 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26566 }
26567 }
26568 }
26569
26570 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
26571 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026572 for (uint32_t n = 1; n <= 8; n++) {
26573 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026574 GemmMicrokernelTester()
26575 .mr(4)
26576 .nr(8)
26577 .kr(1)
26578 .sr(1)
26579 .m(m)
26580 .n(n)
26581 .k(k)
26582 .cm_stride(11)
26583 .iterations(1)
26584 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26585 }
26586 }
26587 }
26588 }
26589
26590 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
26591 for (size_t k = 1; k <= 5; k += 2) {
26592 GemmMicrokernelTester()
26593 .mr(4)
26594 .nr(8)
26595 .kr(1)
26596 .sr(1)
26597 .m(4)
26598 .n(8)
26599 .k(k)
26600 .ks(3)
26601 .a_offset(23)
26602 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26603 }
26604 }
26605
26606 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026607 for (size_t k = 1; k <= 5; k += 2) {
26608 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026609 GemmMicrokernelTester()
26610 .mr(4)
26611 .nr(8)
26612 .kr(1)
26613 .sr(1)
26614 .m(4)
26615 .n(8)
26616 .k(k)
26617 .ks(3)
26618 .a_offset(23)
26619 .zero_index(mz)
26620 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26621 }
26622 }
26623 }
26624
26625 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmin) {
26626 GemmMicrokernelTester()
26627 .mr(4)
26628 .nr(8)
26629 .kr(1)
26630 .sr(1)
26631 .m(4)
26632 .n(8)
26633 .k(1)
26634 .qmin(128)
26635 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26636 }
26637
26638 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, qmax) {
26639 GemmMicrokernelTester()
26640 .mr(4)
26641 .nr(8)
26642 .kr(1)
26643 .sr(1)
26644 .m(4)
26645 .n(8)
26646 .k(1)
26647 .qmax(128)
26648 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26649 }
26650
26651 TEST(F32_IGEMM_MINMAX_4X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
26652 GemmMicrokernelTester()
26653 .mr(4)
26654 .nr(8)
26655 .kr(1)
26656 .sr(1)
26657 .m(4)
26658 .n(8)
26659 .k(1)
26660 .cm_stride(11)
26661 .Test(xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26662 }
26663#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26664
26665
26666#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
26667 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1) {
26668 GemmMicrokernelTester()
26669 .mr(6)
26670 .nr(8)
26671 .kr(1)
26672 .sr(1)
26673 .m(6)
26674 .n(8)
26675 .k(1)
26676 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26677 }
26678
26679 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cn) {
26680 GemmMicrokernelTester()
26681 .mr(6)
26682 .nr(8)
26683 .kr(1)
26684 .sr(1)
26685 .m(6)
26686 .n(8)
26687 .k(1)
26688 .cn_stride(11)
26689 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26690 }
26691
26692 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026693 for (uint32_t n = 1; n <= 8; n++) {
26694 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026695 GemmMicrokernelTester()
26696 .mr(6)
26697 .nr(8)
26698 .kr(1)
26699 .sr(1)
26700 .m(m)
26701 .n(n)
26702 .k(1)
26703 .iterations(1)
26704 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26705 }
26706 }
26707 }
26708
26709 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_m) {
26710 for (uint32_t m = 1; m <= 6; m++) {
26711 GemmMicrokernelTester()
26712 .mr(6)
26713 .nr(8)
26714 .kr(1)
26715 .sr(1)
26716 .m(m)
26717 .n(8)
26718 .k(1)
26719 .iterations(1)
26720 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26721 }
26722 }
26723
26724 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_eq_1_subtile_n) {
26725 for (uint32_t n = 1; n <= 8; n++) {
26726 GemmMicrokernelTester()
26727 .mr(6)
26728 .nr(8)
26729 .kr(1)
26730 .sr(1)
26731 .m(6)
26732 .n(n)
26733 .k(1)
26734 .iterations(1)
26735 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26736 }
26737 }
26738
26739 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1) {
26740 for (size_t k = 2; k < 10; k++) {
26741 GemmMicrokernelTester()
26742 .mr(6)
26743 .nr(8)
26744 .kr(1)
26745 .sr(1)
26746 .m(6)
26747 .n(8)
26748 .k(k)
26749 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26750 }
26751 }
26752
26753 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, k_gt_1_subtile) {
26754 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026755 for (uint32_t n = 1; n <= 8; n++) {
26756 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026757 GemmMicrokernelTester()
26758 .mr(6)
26759 .nr(8)
26760 .kr(1)
26761 .sr(1)
26762 .m(m)
26763 .n(n)
26764 .k(k)
26765 .iterations(1)
26766 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26767 }
26768 }
26769 }
26770 }
26771
26772 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8) {
26773 for (uint32_t n = 9; n < 16; n++) {
26774 for (size_t k = 1; k <= 5; k += 2) {
26775 GemmMicrokernelTester()
26776 .mr(6)
26777 .nr(8)
26778 .kr(1)
26779 .sr(1)
26780 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026781 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026782 .k(k)
26783 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26784 }
26785 }
26786 }
26787
26788 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_strided_cn) {
26789 for (uint32_t n = 9; n < 16; n++) {
26790 for (size_t k = 1; k <= 5; k += 2) {
26791 GemmMicrokernelTester()
26792 .mr(6)
26793 .nr(8)
26794 .kr(1)
26795 .sr(1)
26796 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026797 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026798 .k(k)
26799 .cn_stride(11)
26800 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26801 }
26802 }
26803 }
26804
26805 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_subtile) {
26806 for (uint32_t n = 9; n < 16; n++) {
26807 for (size_t k = 1; k <= 5; k += 2) {
26808 for (uint32_t m = 1; m <= 6; m++) {
26809 GemmMicrokernelTester()
26810 .mr(6)
26811 .nr(8)
26812 .kr(1)
26813 .sr(1)
26814 .m(m)
26815 .n(n)
26816 .k(k)
26817 .iterations(1)
26818 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26819 }
26820 }
26821 }
26822 }
26823
26824 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8) {
26825 for (uint32_t n = 16; n <= 24; n += 8) {
26826 for (size_t k = 1; k <= 5; k += 2) {
26827 GemmMicrokernelTester()
26828 .mr(6)
26829 .nr(8)
26830 .kr(1)
26831 .sr(1)
26832 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026833 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026834 .k(k)
26835 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26836 }
26837 }
26838 }
26839
26840 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_strided_cn) {
26841 for (uint32_t n = 16; n <= 24; n += 8) {
26842 for (size_t k = 1; k <= 5; k += 2) {
26843 GemmMicrokernelTester()
26844 .mr(6)
26845 .nr(8)
26846 .kr(1)
26847 .sr(1)
26848 .m(6)
26849 .n(n)
26850 .k(k)
26851 .cn_stride(11)
26852 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26853 }
26854 }
26855 }
26856
26857 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_subtile) {
26858 for (uint32_t n = 16; n <= 24; n += 8) {
26859 for (size_t k = 1; k <= 5; k += 2) {
26860 for (uint32_t m = 1; m <= 6; m++) {
26861 GemmMicrokernelTester()
26862 .mr(6)
26863 .nr(8)
26864 .kr(1)
26865 .sr(1)
26866 .m(m)
26867 .n(n)
26868 .k(k)
26869 .iterations(1)
26870 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26871 }
26872 }
26873 }
26874 }
26875
26876 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, small_kernel) {
26877 for (size_t k = 1; k <= 5; k += 2) {
26878 GemmMicrokernelTester()
26879 .mr(6)
26880 .nr(8)
26881 .kr(1)
26882 .sr(1)
26883 .m(6)
26884 .n(8)
26885 .k(k)
26886 .ks(3)
26887 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26888 }
26889 }
26890
26891 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, small_kernel_subtile) {
26892 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026893 for (uint32_t n = 1; n <= 8; n++) {
26894 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026895 GemmMicrokernelTester()
26896 .mr(6)
26897 .nr(8)
26898 .kr(1)
26899 .sr(1)
26900 .m(m)
26901 .n(n)
26902 .k(k)
26903 .ks(3)
26904 .iterations(1)
26905 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26906 }
26907 }
26908 }
26909 }
26910
26911 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_gt_8_small_kernel) {
26912 for (uint32_t n = 9; n < 16; n++) {
26913 for (size_t k = 1; k <= 5; k += 2) {
26914 GemmMicrokernelTester()
26915 .mr(6)
26916 .nr(8)
26917 .kr(1)
26918 .sr(1)
26919 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026920 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026921 .k(k)
26922 .ks(3)
26923 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26924 }
26925 }
26926 }
26927
26928 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, n_div_8_small_kernel) {
26929 for (uint32_t n = 16; n <= 24; n += 8) {
26930 for (size_t k = 1; k <= 5; k += 2) {
26931 GemmMicrokernelTester()
26932 .mr(6)
26933 .nr(8)
26934 .kr(1)
26935 .sr(1)
26936 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026937 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026938 .k(k)
26939 .ks(3)
26940 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26941 }
26942 }
26943 }
26944
26945 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm_subtile) {
26946 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026947 for (uint32_t n = 1; n <= 8; n++) {
26948 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026949 GemmMicrokernelTester()
26950 .mr(6)
26951 .nr(8)
26952 .kr(1)
26953 .sr(1)
26954 .m(m)
26955 .n(n)
26956 .k(k)
26957 .cm_stride(11)
26958 .iterations(1)
26959 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26960 }
26961 }
26962 }
26963 }
26964
26965 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, a_offset) {
26966 for (size_t k = 1; k <= 5; k += 2) {
26967 GemmMicrokernelTester()
26968 .mr(6)
26969 .nr(8)
26970 .kr(1)
26971 .sr(1)
26972 .m(6)
26973 .n(8)
26974 .k(k)
26975 .ks(3)
26976 .a_offset(37)
26977 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26978 }
26979 }
26980
26981 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026982 for (size_t k = 1; k <= 5; k += 2) {
26983 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080026984 GemmMicrokernelTester()
26985 .mr(6)
26986 .nr(8)
26987 .kr(1)
26988 .sr(1)
26989 .m(6)
26990 .n(8)
26991 .k(k)
26992 .ks(3)
26993 .a_offset(37)
26994 .zero_index(mz)
26995 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
26996 }
26997 }
26998 }
26999
27000 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmin) {
27001 GemmMicrokernelTester()
27002 .mr(6)
27003 .nr(8)
27004 .kr(1)
27005 .sr(1)
27006 .m(6)
27007 .n(8)
27008 .k(1)
27009 .qmin(128)
27010 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27011 }
27012
27013 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, qmax) {
27014 GemmMicrokernelTester()
27015 .mr(6)
27016 .nr(8)
27017 .kr(1)
27018 .sr(1)
27019 .m(6)
27020 .n(8)
27021 .k(1)
27022 .qmax(128)
27023 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27024 }
27025
27026 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_LOADSPLAT, strided_cm) {
27027 GemmMicrokernelTester()
27028 .mr(6)
27029 .nr(8)
27030 .kr(1)
27031 .sr(1)
27032 .m(6)
27033 .n(8)
27034 .k(1)
27035 .cm_stride(11)
27036 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, xnn_init_f32_minmax_wasmsimd_params);
27037 }
27038#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27039
27040
27041#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27042 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
27043 GemmMicrokernelTester()
27044 .mr(1)
27045 .nr(8)
27046 .kr(1)
27047 .sr(1)
27048 .m(1)
27049 .n(8)
27050 .k(4)
27051 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27052 }
27053
27054 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cn) {
27055 GemmMicrokernelTester()
27056 .mr(1)
27057 .nr(8)
27058 .kr(1)
27059 .sr(1)
27060 .m(1)
27061 .n(8)
27062 .k(4)
27063 .cn_stride(11)
27064 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27065 }
27066
27067 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027068 for (uint32_t n = 1; n <= 8; n++) {
27069 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027070 GemmMicrokernelTester()
27071 .mr(1)
27072 .nr(8)
27073 .kr(1)
27074 .sr(1)
27075 .m(m)
27076 .n(n)
27077 .k(4)
27078 .iterations(1)
27079 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27080 }
27081 }
27082 }
27083
27084 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
27085 for (uint32_t m = 1; m <= 1; m++) {
27086 GemmMicrokernelTester()
27087 .mr(1)
27088 .nr(8)
27089 .kr(1)
27090 .sr(1)
27091 .m(m)
27092 .n(8)
27093 .k(4)
27094 .iterations(1)
27095 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27096 }
27097 }
27098
27099 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
27100 for (uint32_t n = 1; n <= 8; n++) {
27101 GemmMicrokernelTester()
27102 .mr(1)
27103 .nr(8)
27104 .kr(1)
27105 .sr(1)
27106 .m(1)
27107 .n(n)
27108 .k(4)
27109 .iterations(1)
27110 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27111 }
27112 }
27113
27114 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
27115 for (size_t k = 1; k < 4; k++) {
27116 GemmMicrokernelTester()
27117 .mr(1)
27118 .nr(8)
27119 .kr(1)
27120 .sr(1)
27121 .m(1)
27122 .n(8)
27123 .k(k)
27124 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27125 }
27126 }
27127
27128 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
27129 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027130 for (uint32_t n = 1; n <= 8; n++) {
27131 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027132 GemmMicrokernelTester()
27133 .mr(1)
27134 .nr(8)
27135 .kr(1)
27136 .sr(1)
27137 .m(m)
27138 .n(n)
27139 .k(k)
27140 .iterations(1)
27141 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27142 }
27143 }
27144 }
27145 }
27146
27147 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
27148 for (size_t k = 5; k < 8; k++) {
27149 GemmMicrokernelTester()
27150 .mr(1)
27151 .nr(8)
27152 .kr(1)
27153 .sr(1)
27154 .m(1)
27155 .n(8)
27156 .k(k)
27157 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27158 }
27159 }
27160
27161 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
27162 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027163 for (uint32_t n = 1; n <= 8; n++) {
27164 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027165 GemmMicrokernelTester()
27166 .mr(1)
27167 .nr(8)
27168 .kr(1)
27169 .sr(1)
27170 .m(m)
27171 .n(n)
27172 .k(k)
27173 .iterations(1)
27174 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27175 }
27176 }
27177 }
27178 }
27179
27180 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4) {
27181 for (size_t k = 8; k <= 40; k += 4) {
27182 GemmMicrokernelTester()
27183 .mr(1)
27184 .nr(8)
27185 .kr(1)
27186 .sr(1)
27187 .m(1)
27188 .n(8)
27189 .k(k)
27190 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27191 }
27192 }
27193
27194 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
27195 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027196 for (uint32_t n = 1; n <= 8; n++) {
27197 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027198 GemmMicrokernelTester()
27199 .mr(1)
27200 .nr(8)
27201 .kr(1)
27202 .sr(1)
27203 .m(m)
27204 .n(n)
27205 .k(k)
27206 .iterations(1)
27207 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27208 }
27209 }
27210 }
27211 }
27212
27213 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
27214 for (uint32_t n = 9; n < 16; n++) {
27215 for (size_t k = 1; k <= 20; k += 5) {
27216 GemmMicrokernelTester()
27217 .mr(1)
27218 .nr(8)
27219 .kr(1)
27220 .sr(1)
27221 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027222 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027223 .k(k)
27224 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27225 }
27226 }
27227 }
27228
27229 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
27230 for (uint32_t n = 9; n < 16; n++) {
27231 for (size_t k = 1; k <= 20; k += 5) {
27232 GemmMicrokernelTester()
27233 .mr(1)
27234 .nr(8)
27235 .kr(1)
27236 .sr(1)
27237 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027238 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027239 .k(k)
27240 .cn_stride(11)
27241 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27242 }
27243 }
27244 }
27245
27246 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
27247 for (uint32_t n = 9; n < 16; n++) {
27248 for (size_t k = 1; k <= 20; k += 5) {
27249 for (uint32_t m = 1; m <= 1; m++) {
27250 GemmMicrokernelTester()
27251 .mr(1)
27252 .nr(8)
27253 .kr(1)
27254 .sr(1)
27255 .m(m)
27256 .n(n)
27257 .k(k)
27258 .iterations(1)
27259 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27260 }
27261 }
27262 }
27263 }
27264
27265 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8) {
27266 for (uint32_t n = 16; n <= 24; n += 8) {
27267 for (size_t k = 1; k <= 20; k += 5) {
27268 GemmMicrokernelTester()
27269 .mr(1)
27270 .nr(8)
27271 .kr(1)
27272 .sr(1)
27273 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027274 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027275 .k(k)
27276 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27277 }
27278 }
27279 }
27280
27281 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
27282 for (uint32_t n = 16; n <= 24; n += 8) {
27283 for (size_t k = 1; k <= 20; k += 5) {
27284 GemmMicrokernelTester()
27285 .mr(1)
27286 .nr(8)
27287 .kr(1)
27288 .sr(1)
27289 .m(1)
27290 .n(n)
27291 .k(k)
27292 .cn_stride(11)
27293 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27294 }
27295 }
27296 }
27297
27298 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
27299 for (uint32_t n = 16; n <= 24; n += 8) {
27300 for (size_t k = 1; k <= 20; k += 5) {
27301 for (uint32_t m = 1; m <= 1; m++) {
27302 GemmMicrokernelTester()
27303 .mr(1)
27304 .nr(8)
27305 .kr(1)
27306 .sr(1)
27307 .m(m)
27308 .n(n)
27309 .k(k)
27310 .iterations(1)
27311 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27312 }
27313 }
27314 }
27315 }
27316
27317 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, small_kernel) {
27318 for (size_t k = 1; k <= 20; k += 5) {
27319 GemmMicrokernelTester()
27320 .mr(1)
27321 .nr(8)
27322 .kr(1)
27323 .sr(1)
27324 .m(1)
27325 .n(8)
27326 .k(k)
27327 .ks(3)
27328 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27329 }
27330 }
27331
27332 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
27333 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027334 for (uint32_t n = 1; n <= 8; n++) {
27335 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027336 GemmMicrokernelTester()
27337 .mr(1)
27338 .nr(8)
27339 .kr(1)
27340 .sr(1)
27341 .m(m)
27342 .n(n)
27343 .k(k)
27344 .ks(3)
27345 .iterations(1)
27346 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27347 }
27348 }
27349 }
27350 }
27351
27352 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
27353 for (uint32_t n = 9; n < 16; n++) {
27354 for (size_t k = 1; k <= 20; k += 5) {
27355 GemmMicrokernelTester()
27356 .mr(1)
27357 .nr(8)
27358 .kr(1)
27359 .sr(1)
27360 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027361 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027362 .k(k)
27363 .ks(3)
27364 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27365 }
27366 }
27367 }
27368
27369 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
27370 for (uint32_t n = 16; n <= 24; n += 8) {
27371 for (size_t k = 1; k <= 20; k += 5) {
27372 GemmMicrokernelTester()
27373 .mr(1)
27374 .nr(8)
27375 .kr(1)
27376 .sr(1)
27377 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027378 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027379 .k(k)
27380 .ks(3)
27381 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27382 }
27383 }
27384 }
27385
27386 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
27387 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027388 for (uint32_t n = 1; n <= 8; n++) {
27389 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027390 GemmMicrokernelTester()
27391 .mr(1)
27392 .nr(8)
27393 .kr(1)
27394 .sr(1)
27395 .m(m)
27396 .n(n)
27397 .k(k)
27398 .cm_stride(11)
27399 .iterations(1)
27400 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27401 }
27402 }
27403 }
27404 }
27405
27406 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, a_offset) {
27407 for (size_t k = 1; k <= 20; k += 5) {
27408 GemmMicrokernelTester()
27409 .mr(1)
27410 .nr(8)
27411 .kr(1)
27412 .sr(1)
27413 .m(1)
27414 .n(8)
27415 .k(k)
27416 .ks(3)
27417 .a_offset(23)
27418 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27419 }
27420 }
27421
27422 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027423 for (size_t k = 1; k <= 20; k += 5) {
27424 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027425 GemmMicrokernelTester()
27426 .mr(1)
27427 .nr(8)
27428 .kr(1)
27429 .sr(1)
27430 .m(1)
27431 .n(8)
27432 .k(k)
27433 .ks(3)
27434 .a_offset(23)
27435 .zero_index(mz)
27436 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27437 }
27438 }
27439 }
27440
27441 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmin) {
27442 GemmMicrokernelTester()
27443 .mr(1)
27444 .nr(8)
27445 .kr(1)
27446 .sr(1)
27447 .m(1)
27448 .n(8)
27449 .k(4)
27450 .qmin(128)
27451 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27452 }
27453
27454 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, qmax) {
27455 GemmMicrokernelTester()
27456 .mr(1)
27457 .nr(8)
27458 .kr(1)
27459 .sr(1)
27460 .m(1)
27461 .n(8)
27462 .k(4)
27463 .qmax(128)
27464 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27465 }
27466
27467 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_ARM_SPLAT, strided_cm) {
27468 GemmMicrokernelTester()
27469 .mr(1)
27470 .nr(8)
27471 .kr(1)
27472 .sr(1)
27473 .m(1)
27474 .n(8)
27475 .k(4)
27476 .cm_stride(11)
27477 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27478 }
27479#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27480
27481
27482#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27483 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4) {
27484 GemmMicrokernelTester()
27485 .mr(6)
27486 .nr(8)
27487 .kr(1)
27488 .sr(1)
27489 .m(6)
27490 .n(8)
27491 .k(4)
27492 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27493 }
27494
27495 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cn) {
27496 GemmMicrokernelTester()
27497 .mr(6)
27498 .nr(8)
27499 .kr(1)
27500 .sr(1)
27501 .m(6)
27502 .n(8)
27503 .k(4)
27504 .cn_stride(11)
27505 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27506 }
27507
27508 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027509 for (uint32_t n = 1; n <= 8; n++) {
27510 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027511 GemmMicrokernelTester()
27512 .mr(6)
27513 .nr(8)
27514 .kr(1)
27515 .sr(1)
27516 .m(m)
27517 .n(n)
27518 .k(4)
27519 .iterations(1)
27520 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27521 }
27522 }
27523 }
27524
27525 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_m) {
27526 for (uint32_t m = 1; m <= 6; m++) {
27527 GemmMicrokernelTester()
27528 .mr(6)
27529 .nr(8)
27530 .kr(1)
27531 .sr(1)
27532 .m(m)
27533 .n(8)
27534 .k(4)
27535 .iterations(1)
27536 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27537 }
27538 }
27539
27540 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_eq_4_subtile_n) {
27541 for (uint32_t n = 1; n <= 8; n++) {
27542 GemmMicrokernelTester()
27543 .mr(6)
27544 .nr(8)
27545 .kr(1)
27546 .sr(1)
27547 .m(6)
27548 .n(n)
27549 .k(4)
27550 .iterations(1)
27551 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27552 }
27553 }
27554
27555 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4) {
27556 for (size_t k = 1; k < 4; k++) {
27557 GemmMicrokernelTester()
27558 .mr(6)
27559 .nr(8)
27560 .kr(1)
27561 .sr(1)
27562 .m(6)
27563 .n(8)
27564 .k(k)
27565 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27566 }
27567 }
27568
27569 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_lt_4_subtile) {
27570 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027571 for (uint32_t n = 1; n <= 8; n++) {
27572 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027573 GemmMicrokernelTester()
27574 .mr(6)
27575 .nr(8)
27576 .kr(1)
27577 .sr(1)
27578 .m(m)
27579 .n(n)
27580 .k(k)
27581 .iterations(1)
27582 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27583 }
27584 }
27585 }
27586 }
27587
27588 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4) {
27589 for (size_t k = 5; k < 8; k++) {
27590 GemmMicrokernelTester()
27591 .mr(6)
27592 .nr(8)
27593 .kr(1)
27594 .sr(1)
27595 .m(6)
27596 .n(8)
27597 .k(k)
27598 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27599 }
27600 }
27601
27602 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_gt_4_subtile) {
27603 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027604 for (uint32_t n = 1; n <= 8; n++) {
27605 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027606 GemmMicrokernelTester()
27607 .mr(6)
27608 .nr(8)
27609 .kr(1)
27610 .sr(1)
27611 .m(m)
27612 .n(n)
27613 .k(k)
27614 .iterations(1)
27615 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27616 }
27617 }
27618 }
27619 }
27620
27621 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4) {
27622 for (size_t k = 8; k <= 40; k += 4) {
27623 GemmMicrokernelTester()
27624 .mr(6)
27625 .nr(8)
27626 .kr(1)
27627 .sr(1)
27628 .m(6)
27629 .n(8)
27630 .k(k)
27631 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27632 }
27633 }
27634
27635 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, k_div_4_subtile) {
27636 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027637 for (uint32_t n = 1; n <= 8; n++) {
27638 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027639 GemmMicrokernelTester()
27640 .mr(6)
27641 .nr(8)
27642 .kr(1)
27643 .sr(1)
27644 .m(m)
27645 .n(n)
27646 .k(k)
27647 .iterations(1)
27648 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27649 }
27650 }
27651 }
27652 }
27653
27654 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
27655 for (uint32_t n = 9; n < 16; n++) {
27656 for (size_t k = 1; k <= 20; k += 5) {
27657 GemmMicrokernelTester()
27658 .mr(6)
27659 .nr(8)
27660 .kr(1)
27661 .sr(1)
27662 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027663 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027664 .k(k)
27665 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27666 }
27667 }
27668 }
27669
27670 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
27671 for (uint32_t n = 9; n < 16; n++) {
27672 for (size_t k = 1; k <= 20; k += 5) {
27673 GemmMicrokernelTester()
27674 .mr(6)
27675 .nr(8)
27676 .kr(1)
27677 .sr(1)
27678 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027679 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027680 .k(k)
27681 .cn_stride(11)
27682 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27683 }
27684 }
27685 }
27686
27687 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
27688 for (uint32_t n = 9; n < 16; n++) {
27689 for (size_t k = 1; k <= 20; k += 5) {
27690 for (uint32_t m = 1; m <= 6; m++) {
27691 GemmMicrokernelTester()
27692 .mr(6)
27693 .nr(8)
27694 .kr(1)
27695 .sr(1)
27696 .m(m)
27697 .n(n)
27698 .k(k)
27699 .iterations(1)
27700 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27701 }
27702 }
27703 }
27704 }
27705
27706 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8) {
27707 for (uint32_t n = 16; n <= 24; n += 8) {
27708 for (size_t k = 1; k <= 20; k += 5) {
27709 GemmMicrokernelTester()
27710 .mr(6)
27711 .nr(8)
27712 .kr(1)
27713 .sr(1)
27714 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027715 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027716 .k(k)
27717 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27718 }
27719 }
27720 }
27721
27722 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
27723 for (uint32_t n = 16; n <= 24; n += 8) {
27724 for (size_t k = 1; k <= 20; k += 5) {
27725 GemmMicrokernelTester()
27726 .mr(6)
27727 .nr(8)
27728 .kr(1)
27729 .sr(1)
27730 .m(6)
27731 .n(n)
27732 .k(k)
27733 .cn_stride(11)
27734 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27735 }
27736 }
27737 }
27738
27739 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
27740 for (uint32_t n = 16; n <= 24; n += 8) {
27741 for (size_t k = 1; k <= 20; k += 5) {
27742 for (uint32_t m = 1; m <= 6; m++) {
27743 GemmMicrokernelTester()
27744 .mr(6)
27745 .nr(8)
27746 .kr(1)
27747 .sr(1)
27748 .m(m)
27749 .n(n)
27750 .k(k)
27751 .iterations(1)
27752 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27753 }
27754 }
27755 }
27756 }
27757
27758 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, small_kernel) {
27759 for (size_t k = 1; k <= 20; k += 5) {
27760 GemmMicrokernelTester()
27761 .mr(6)
27762 .nr(8)
27763 .kr(1)
27764 .sr(1)
27765 .m(6)
27766 .n(8)
27767 .k(k)
27768 .ks(3)
27769 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27770 }
27771 }
27772
27773 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, small_kernel_subtile) {
27774 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027775 for (uint32_t n = 1; n <= 8; n++) {
27776 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027777 GemmMicrokernelTester()
27778 .mr(6)
27779 .nr(8)
27780 .kr(1)
27781 .sr(1)
27782 .m(m)
27783 .n(n)
27784 .k(k)
27785 .ks(3)
27786 .iterations(1)
27787 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27788 }
27789 }
27790 }
27791 }
27792
27793 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_gt_8_small_kernel) {
27794 for (uint32_t n = 9; n < 16; n++) {
27795 for (size_t k = 1; k <= 20; k += 5) {
27796 GemmMicrokernelTester()
27797 .mr(6)
27798 .nr(8)
27799 .kr(1)
27800 .sr(1)
27801 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027802 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027803 .k(k)
27804 .ks(3)
27805 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27806 }
27807 }
27808 }
27809
27810 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, n_div_8_small_kernel) {
27811 for (uint32_t n = 16; n <= 24; n += 8) {
27812 for (size_t k = 1; k <= 20; k += 5) {
27813 GemmMicrokernelTester()
27814 .mr(6)
27815 .nr(8)
27816 .kr(1)
27817 .sr(1)
27818 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027819 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027820 .k(k)
27821 .ks(3)
27822 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27823 }
27824 }
27825 }
27826
27827 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
27828 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027829 for (uint32_t n = 1; n <= 8; n++) {
27830 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027831 GemmMicrokernelTester()
27832 .mr(6)
27833 .nr(8)
27834 .kr(1)
27835 .sr(1)
27836 .m(m)
27837 .n(n)
27838 .k(k)
27839 .cm_stride(11)
27840 .iterations(1)
27841 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27842 }
27843 }
27844 }
27845 }
27846
27847 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, a_offset) {
27848 for (size_t k = 1; k <= 20; k += 5) {
27849 GemmMicrokernelTester()
27850 .mr(6)
27851 .nr(8)
27852 .kr(1)
27853 .sr(1)
27854 .m(6)
27855 .n(8)
27856 .k(k)
27857 .ks(3)
27858 .a_offset(127)
27859 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27860 }
27861 }
27862
27863 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027864 for (size_t k = 1; k <= 20; k += 5) {
27865 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027866 GemmMicrokernelTester()
27867 .mr(6)
27868 .nr(8)
27869 .kr(1)
27870 .sr(1)
27871 .m(6)
27872 .n(8)
27873 .k(k)
27874 .ks(3)
27875 .a_offset(127)
27876 .zero_index(mz)
27877 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27878 }
27879 }
27880 }
27881
27882 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmin) {
27883 GemmMicrokernelTester()
27884 .mr(6)
27885 .nr(8)
27886 .kr(1)
27887 .sr(1)
27888 .m(6)
27889 .n(8)
27890 .k(4)
27891 .qmin(128)
27892 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27893 }
27894
27895 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, qmax) {
27896 GemmMicrokernelTester()
27897 .mr(6)
27898 .nr(8)
27899 .kr(1)
27900 .sr(1)
27901 .m(6)
27902 .n(8)
27903 .k(4)
27904 .qmax(128)
27905 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27906 }
27907
27908 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_ARM_SPLAT, strided_cm) {
27909 GemmMicrokernelTester()
27910 .mr(6)
27911 .nr(8)
27912 .kr(1)
27913 .sr(1)
27914 .m(6)
27915 .n(8)
27916 .k(4)
27917 .cm_stride(11)
27918 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, xnn_init_f32_minmax_wasmsimd_params);
27919 }
27920#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27921
27922
27923#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
27924 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4) {
27925 GemmMicrokernelTester()
27926 .mr(1)
27927 .nr(8)
27928 .kr(1)
27929 .sr(1)
27930 .m(1)
27931 .n(8)
27932 .k(4)
27933 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27934 }
27935
27936 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cn) {
27937 GemmMicrokernelTester()
27938 .mr(1)
27939 .nr(8)
27940 .kr(1)
27941 .sr(1)
27942 .m(1)
27943 .n(8)
27944 .k(4)
27945 .cn_stride(11)
27946 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27947 }
27948
27949 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027950 for (uint32_t n = 1; n <= 8; n++) {
27951 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080027952 GemmMicrokernelTester()
27953 .mr(1)
27954 .nr(8)
27955 .kr(1)
27956 .sr(1)
27957 .m(m)
27958 .n(n)
27959 .k(4)
27960 .iterations(1)
27961 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27962 }
27963 }
27964 }
27965
27966 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
27967 for (uint32_t m = 1; m <= 1; m++) {
27968 GemmMicrokernelTester()
27969 .mr(1)
27970 .nr(8)
27971 .kr(1)
27972 .sr(1)
27973 .m(m)
27974 .n(8)
27975 .k(4)
27976 .iterations(1)
27977 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27978 }
27979 }
27980
27981 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
27982 for (uint32_t n = 1; n <= 8; n++) {
27983 GemmMicrokernelTester()
27984 .mr(1)
27985 .nr(8)
27986 .kr(1)
27987 .sr(1)
27988 .m(1)
27989 .n(n)
27990 .k(4)
27991 .iterations(1)
27992 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
27993 }
27994 }
27995
27996 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4) {
27997 for (size_t k = 1; k < 4; k++) {
27998 GemmMicrokernelTester()
27999 .mr(1)
28000 .nr(8)
28001 .kr(1)
28002 .sr(1)
28003 .m(1)
28004 .n(8)
28005 .k(k)
28006 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28007 }
28008 }
28009
28010 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
28011 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028012 for (uint32_t n = 1; n <= 8; n++) {
28013 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028014 GemmMicrokernelTester()
28015 .mr(1)
28016 .nr(8)
28017 .kr(1)
28018 .sr(1)
28019 .m(m)
28020 .n(n)
28021 .k(k)
28022 .iterations(1)
28023 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28024 }
28025 }
28026 }
28027 }
28028
28029 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4) {
28030 for (size_t k = 5; k < 8; k++) {
28031 GemmMicrokernelTester()
28032 .mr(1)
28033 .nr(8)
28034 .kr(1)
28035 .sr(1)
28036 .m(1)
28037 .n(8)
28038 .k(k)
28039 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28040 }
28041 }
28042
28043 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
28044 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028045 for (uint32_t n = 1; n <= 8; n++) {
28046 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028047 GemmMicrokernelTester()
28048 .mr(1)
28049 .nr(8)
28050 .kr(1)
28051 .sr(1)
28052 .m(m)
28053 .n(n)
28054 .k(k)
28055 .iterations(1)
28056 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28057 }
28058 }
28059 }
28060 }
28061
28062 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4) {
28063 for (size_t k = 8; k <= 40; k += 4) {
28064 GemmMicrokernelTester()
28065 .mr(1)
28066 .nr(8)
28067 .kr(1)
28068 .sr(1)
28069 .m(1)
28070 .n(8)
28071 .k(k)
28072 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28073 }
28074 }
28075
28076 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
28077 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028078 for (uint32_t n = 1; n <= 8; n++) {
28079 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028080 GemmMicrokernelTester()
28081 .mr(1)
28082 .nr(8)
28083 .kr(1)
28084 .sr(1)
28085 .m(m)
28086 .n(n)
28087 .k(k)
28088 .iterations(1)
28089 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28090 }
28091 }
28092 }
28093 }
28094
28095 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8) {
28096 for (uint32_t n = 9; n < 16; n++) {
28097 for (size_t k = 1; k <= 20; k += 5) {
28098 GemmMicrokernelTester()
28099 .mr(1)
28100 .nr(8)
28101 .kr(1)
28102 .sr(1)
28103 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028104 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028105 .k(k)
28106 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28107 }
28108 }
28109 }
28110
28111 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
28112 for (uint32_t n = 9; n < 16; n++) {
28113 for (size_t k = 1; k <= 20; k += 5) {
28114 GemmMicrokernelTester()
28115 .mr(1)
28116 .nr(8)
28117 .kr(1)
28118 .sr(1)
28119 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028120 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028121 .k(k)
28122 .cn_stride(11)
28123 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28124 }
28125 }
28126 }
28127
28128 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
28129 for (uint32_t n = 9; n < 16; n++) {
28130 for (size_t k = 1; k <= 20; k += 5) {
28131 for (uint32_t m = 1; m <= 1; m++) {
28132 GemmMicrokernelTester()
28133 .mr(1)
28134 .nr(8)
28135 .kr(1)
28136 .sr(1)
28137 .m(m)
28138 .n(n)
28139 .k(k)
28140 .iterations(1)
28141 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28142 }
28143 }
28144 }
28145 }
28146
28147 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8) {
28148 for (uint32_t n = 16; n <= 24; n += 8) {
28149 for (size_t k = 1; k <= 20; k += 5) {
28150 GemmMicrokernelTester()
28151 .mr(1)
28152 .nr(8)
28153 .kr(1)
28154 .sr(1)
28155 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028156 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028157 .k(k)
28158 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28159 }
28160 }
28161 }
28162
28163 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
28164 for (uint32_t n = 16; n <= 24; n += 8) {
28165 for (size_t k = 1; k <= 20; k += 5) {
28166 GemmMicrokernelTester()
28167 .mr(1)
28168 .nr(8)
28169 .kr(1)
28170 .sr(1)
28171 .m(1)
28172 .n(n)
28173 .k(k)
28174 .cn_stride(11)
28175 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28176 }
28177 }
28178 }
28179
28180 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
28181 for (uint32_t n = 16; n <= 24; n += 8) {
28182 for (size_t k = 1; k <= 20; k += 5) {
28183 for (uint32_t m = 1; m <= 1; m++) {
28184 GemmMicrokernelTester()
28185 .mr(1)
28186 .nr(8)
28187 .kr(1)
28188 .sr(1)
28189 .m(m)
28190 .n(n)
28191 .k(k)
28192 .iterations(1)
28193 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28194 }
28195 }
28196 }
28197 }
28198
28199 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, small_kernel) {
28200 for (size_t k = 1; k <= 20; k += 5) {
28201 GemmMicrokernelTester()
28202 .mr(1)
28203 .nr(8)
28204 .kr(1)
28205 .sr(1)
28206 .m(1)
28207 .n(8)
28208 .k(k)
28209 .ks(3)
28210 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28211 }
28212 }
28213
28214 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
28215 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028216 for (uint32_t n = 1; n <= 8; n++) {
28217 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028218 GemmMicrokernelTester()
28219 .mr(1)
28220 .nr(8)
28221 .kr(1)
28222 .sr(1)
28223 .m(m)
28224 .n(n)
28225 .k(k)
28226 .ks(3)
28227 .iterations(1)
28228 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28229 }
28230 }
28231 }
28232 }
28233
28234 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
28235 for (uint32_t n = 9; n < 16; n++) {
28236 for (size_t k = 1; k <= 20; k += 5) {
28237 GemmMicrokernelTester()
28238 .mr(1)
28239 .nr(8)
28240 .kr(1)
28241 .sr(1)
28242 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028243 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028244 .k(k)
28245 .ks(3)
28246 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28247 }
28248 }
28249 }
28250
28251 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
28252 for (uint32_t n = 16; n <= 24; n += 8) {
28253 for (size_t k = 1; k <= 20; k += 5) {
28254 GemmMicrokernelTester()
28255 .mr(1)
28256 .nr(8)
28257 .kr(1)
28258 .sr(1)
28259 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028260 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028261 .k(k)
28262 .ks(3)
28263 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28264 }
28265 }
28266 }
28267
28268 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
28269 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028270 for (uint32_t n = 1; n <= 8; n++) {
28271 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028272 GemmMicrokernelTester()
28273 .mr(1)
28274 .nr(8)
28275 .kr(1)
28276 .sr(1)
28277 .m(m)
28278 .n(n)
28279 .k(k)
28280 .cm_stride(11)
28281 .iterations(1)
28282 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28283 }
28284 }
28285 }
28286 }
28287
28288 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, a_offset) {
28289 for (size_t k = 1; k <= 20; k += 5) {
28290 GemmMicrokernelTester()
28291 .mr(1)
28292 .nr(8)
28293 .kr(1)
28294 .sr(1)
28295 .m(1)
28296 .n(8)
28297 .k(k)
28298 .ks(3)
28299 .a_offset(23)
28300 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28301 }
28302 }
28303
28304 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028305 for (size_t k = 1; k <= 20; k += 5) {
28306 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028307 GemmMicrokernelTester()
28308 .mr(1)
28309 .nr(8)
28310 .kr(1)
28311 .sr(1)
28312 .m(1)
28313 .n(8)
28314 .k(k)
28315 .ks(3)
28316 .a_offset(23)
28317 .zero_index(mz)
28318 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28319 }
28320 }
28321 }
28322
28323 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmin) {
28324 GemmMicrokernelTester()
28325 .mr(1)
28326 .nr(8)
28327 .kr(1)
28328 .sr(1)
28329 .m(1)
28330 .n(8)
28331 .k(4)
28332 .qmin(128)
28333 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28334 }
28335
28336 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, qmax) {
28337 GemmMicrokernelTester()
28338 .mr(1)
28339 .nr(8)
28340 .kr(1)
28341 .sr(1)
28342 .m(1)
28343 .n(8)
28344 .k(4)
28345 .qmax(128)
28346 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28347 }
28348
28349 TEST(F32_IGEMM_MINMAX_1X8__WASMSIMD_X86_SPLAT, strided_cm) {
28350 GemmMicrokernelTester()
28351 .mr(1)
28352 .nr(8)
28353 .kr(1)
28354 .sr(1)
28355 .m(1)
28356 .n(8)
28357 .k(4)
28358 .cm_stride(11)
28359 .Test(xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28360 }
28361#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28362
28363
28364#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28365 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4) {
28366 GemmMicrokernelTester()
28367 .mr(6)
28368 .nr(8)
28369 .kr(1)
28370 .sr(1)
28371 .m(6)
28372 .n(8)
28373 .k(4)
28374 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28375 }
28376
28377 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cn) {
28378 GemmMicrokernelTester()
28379 .mr(6)
28380 .nr(8)
28381 .kr(1)
28382 .sr(1)
28383 .m(6)
28384 .n(8)
28385 .k(4)
28386 .cn_stride(11)
28387 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28388 }
28389
28390 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028391 for (uint32_t n = 1; n <= 8; n++) {
28392 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028393 GemmMicrokernelTester()
28394 .mr(6)
28395 .nr(8)
28396 .kr(1)
28397 .sr(1)
28398 .m(m)
28399 .n(n)
28400 .k(4)
28401 .iterations(1)
28402 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28403 }
28404 }
28405 }
28406
28407 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_m) {
28408 for (uint32_t m = 1; m <= 6; m++) {
28409 GemmMicrokernelTester()
28410 .mr(6)
28411 .nr(8)
28412 .kr(1)
28413 .sr(1)
28414 .m(m)
28415 .n(8)
28416 .k(4)
28417 .iterations(1)
28418 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28419 }
28420 }
28421
28422 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_eq_4_subtile_n) {
28423 for (uint32_t n = 1; n <= 8; n++) {
28424 GemmMicrokernelTester()
28425 .mr(6)
28426 .nr(8)
28427 .kr(1)
28428 .sr(1)
28429 .m(6)
28430 .n(n)
28431 .k(4)
28432 .iterations(1)
28433 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28434 }
28435 }
28436
28437 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4) {
28438 for (size_t k = 1; k < 4; k++) {
28439 GemmMicrokernelTester()
28440 .mr(6)
28441 .nr(8)
28442 .kr(1)
28443 .sr(1)
28444 .m(6)
28445 .n(8)
28446 .k(k)
28447 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28448 }
28449 }
28450
28451 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_lt_4_subtile) {
28452 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028453 for (uint32_t n = 1; n <= 8; n++) {
28454 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028455 GemmMicrokernelTester()
28456 .mr(6)
28457 .nr(8)
28458 .kr(1)
28459 .sr(1)
28460 .m(m)
28461 .n(n)
28462 .k(k)
28463 .iterations(1)
28464 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28465 }
28466 }
28467 }
28468 }
28469
28470 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4) {
28471 for (size_t k = 5; k < 8; k++) {
28472 GemmMicrokernelTester()
28473 .mr(6)
28474 .nr(8)
28475 .kr(1)
28476 .sr(1)
28477 .m(6)
28478 .n(8)
28479 .k(k)
28480 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28481 }
28482 }
28483
28484 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_gt_4_subtile) {
28485 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028486 for (uint32_t n = 1; n <= 8; n++) {
28487 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028488 GemmMicrokernelTester()
28489 .mr(6)
28490 .nr(8)
28491 .kr(1)
28492 .sr(1)
28493 .m(m)
28494 .n(n)
28495 .k(k)
28496 .iterations(1)
28497 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28498 }
28499 }
28500 }
28501 }
28502
28503 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4) {
28504 for (size_t k = 8; k <= 40; k += 4) {
28505 GemmMicrokernelTester()
28506 .mr(6)
28507 .nr(8)
28508 .kr(1)
28509 .sr(1)
28510 .m(6)
28511 .n(8)
28512 .k(k)
28513 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28514 }
28515 }
28516
28517 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, k_div_4_subtile) {
28518 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028519 for (uint32_t n = 1; n <= 8; n++) {
28520 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028521 GemmMicrokernelTester()
28522 .mr(6)
28523 .nr(8)
28524 .kr(1)
28525 .sr(1)
28526 .m(m)
28527 .n(n)
28528 .k(k)
28529 .iterations(1)
28530 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28531 }
28532 }
28533 }
28534 }
28535
28536 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8) {
28537 for (uint32_t n = 9; n < 16; n++) {
28538 for (size_t k = 1; k <= 20; k += 5) {
28539 GemmMicrokernelTester()
28540 .mr(6)
28541 .nr(8)
28542 .kr(1)
28543 .sr(1)
28544 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028545 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028546 .k(k)
28547 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28548 }
28549 }
28550 }
28551
28552 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
28553 for (uint32_t n = 9; n < 16; n++) {
28554 for (size_t k = 1; k <= 20; k += 5) {
28555 GemmMicrokernelTester()
28556 .mr(6)
28557 .nr(8)
28558 .kr(1)
28559 .sr(1)
28560 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028561 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028562 .k(k)
28563 .cn_stride(11)
28564 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28565 }
28566 }
28567 }
28568
28569 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
28570 for (uint32_t n = 9; n < 16; n++) {
28571 for (size_t k = 1; k <= 20; k += 5) {
28572 for (uint32_t m = 1; m <= 6; m++) {
28573 GemmMicrokernelTester()
28574 .mr(6)
28575 .nr(8)
28576 .kr(1)
28577 .sr(1)
28578 .m(m)
28579 .n(n)
28580 .k(k)
28581 .iterations(1)
28582 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28583 }
28584 }
28585 }
28586 }
28587
28588 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8) {
28589 for (uint32_t n = 16; n <= 24; n += 8) {
28590 for (size_t k = 1; k <= 20; k += 5) {
28591 GemmMicrokernelTester()
28592 .mr(6)
28593 .nr(8)
28594 .kr(1)
28595 .sr(1)
28596 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028597 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028598 .k(k)
28599 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28600 }
28601 }
28602 }
28603
28604 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
28605 for (uint32_t n = 16; n <= 24; n += 8) {
28606 for (size_t k = 1; k <= 20; k += 5) {
28607 GemmMicrokernelTester()
28608 .mr(6)
28609 .nr(8)
28610 .kr(1)
28611 .sr(1)
28612 .m(6)
28613 .n(n)
28614 .k(k)
28615 .cn_stride(11)
28616 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28617 }
28618 }
28619 }
28620
28621 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
28622 for (uint32_t n = 16; n <= 24; n += 8) {
28623 for (size_t k = 1; k <= 20; k += 5) {
28624 for (uint32_t m = 1; m <= 6; m++) {
28625 GemmMicrokernelTester()
28626 .mr(6)
28627 .nr(8)
28628 .kr(1)
28629 .sr(1)
28630 .m(m)
28631 .n(n)
28632 .k(k)
28633 .iterations(1)
28634 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28635 }
28636 }
28637 }
28638 }
28639
28640 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, small_kernel) {
28641 for (size_t k = 1; k <= 20; k += 5) {
28642 GemmMicrokernelTester()
28643 .mr(6)
28644 .nr(8)
28645 .kr(1)
28646 .sr(1)
28647 .m(6)
28648 .n(8)
28649 .k(k)
28650 .ks(3)
28651 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28652 }
28653 }
28654
28655 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, small_kernel_subtile) {
28656 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028657 for (uint32_t n = 1; n <= 8; n++) {
28658 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028659 GemmMicrokernelTester()
28660 .mr(6)
28661 .nr(8)
28662 .kr(1)
28663 .sr(1)
28664 .m(m)
28665 .n(n)
28666 .k(k)
28667 .ks(3)
28668 .iterations(1)
28669 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28670 }
28671 }
28672 }
28673 }
28674
28675 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_gt_8_small_kernel) {
28676 for (uint32_t n = 9; n < 16; n++) {
28677 for (size_t k = 1; k <= 20; k += 5) {
28678 GemmMicrokernelTester()
28679 .mr(6)
28680 .nr(8)
28681 .kr(1)
28682 .sr(1)
28683 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028684 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028685 .k(k)
28686 .ks(3)
28687 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28688 }
28689 }
28690 }
28691
28692 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, n_div_8_small_kernel) {
28693 for (uint32_t n = 16; n <= 24; n += 8) {
28694 for (size_t k = 1; k <= 20; k += 5) {
28695 GemmMicrokernelTester()
28696 .mr(6)
28697 .nr(8)
28698 .kr(1)
28699 .sr(1)
28700 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028701 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028702 .k(k)
28703 .ks(3)
28704 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28705 }
28706 }
28707 }
28708
28709 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
28710 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028711 for (uint32_t n = 1; n <= 8; n++) {
28712 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028713 GemmMicrokernelTester()
28714 .mr(6)
28715 .nr(8)
28716 .kr(1)
28717 .sr(1)
28718 .m(m)
28719 .n(n)
28720 .k(k)
28721 .cm_stride(11)
28722 .iterations(1)
28723 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28724 }
28725 }
28726 }
28727 }
28728
28729 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, a_offset) {
28730 for (size_t k = 1; k <= 20; k += 5) {
28731 GemmMicrokernelTester()
28732 .mr(6)
28733 .nr(8)
28734 .kr(1)
28735 .sr(1)
28736 .m(6)
28737 .n(8)
28738 .k(k)
28739 .ks(3)
28740 .a_offset(127)
28741 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28742 }
28743 }
28744
28745 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028746 for (size_t k = 1; k <= 20; k += 5) {
28747 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028748 GemmMicrokernelTester()
28749 .mr(6)
28750 .nr(8)
28751 .kr(1)
28752 .sr(1)
28753 .m(6)
28754 .n(8)
28755 .k(k)
28756 .ks(3)
28757 .a_offset(127)
28758 .zero_index(mz)
28759 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28760 }
28761 }
28762 }
28763
28764 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmin) {
28765 GemmMicrokernelTester()
28766 .mr(6)
28767 .nr(8)
28768 .kr(1)
28769 .sr(1)
28770 .m(6)
28771 .n(8)
28772 .k(4)
28773 .qmin(128)
28774 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28775 }
28776
28777 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, qmax) {
28778 GemmMicrokernelTester()
28779 .mr(6)
28780 .nr(8)
28781 .kr(1)
28782 .sr(1)
28783 .m(6)
28784 .n(8)
28785 .k(4)
28786 .qmax(128)
28787 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28788 }
28789
28790 TEST(F32_IGEMM_MINMAX_6X8__WASMSIMD_X86_SPLAT, strided_cm) {
28791 GemmMicrokernelTester()
28792 .mr(6)
28793 .nr(8)
28794 .kr(1)
28795 .sr(1)
28796 .m(6)
28797 .n(8)
28798 .k(4)
28799 .cm_stride(11)
28800 .Test(xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, xnn_init_f32_minmax_wasmsimd_params);
28801 }
28802#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28803
28804
28805#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28806 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4) {
28807 GemmMicrokernelTester()
28808 .mr(3)
28809 .nr(8)
28810 .kr(1)
28811 .sr(4)
28812 .m(3)
28813 .n(8)
28814 .k(4)
28815 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28816 }
28817
28818 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cn) {
28819 GemmMicrokernelTester()
28820 .mr(3)
28821 .nr(8)
28822 .kr(1)
28823 .sr(4)
28824 .m(3)
28825 .n(8)
28826 .k(4)
28827 .cn_stride(11)
28828 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28829 }
28830
28831 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028832 for (uint32_t n = 1; n <= 8; n++) {
28833 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028834 GemmMicrokernelTester()
28835 .mr(3)
28836 .nr(8)
28837 .kr(1)
28838 .sr(4)
28839 .m(m)
28840 .n(n)
28841 .k(4)
28842 .iterations(1)
28843 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28844 }
28845 }
28846 }
28847
28848 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
28849 for (uint32_t m = 1; m <= 3; m++) {
28850 GemmMicrokernelTester()
28851 .mr(3)
28852 .nr(8)
28853 .kr(1)
28854 .sr(4)
28855 .m(m)
28856 .n(8)
28857 .k(4)
28858 .iterations(1)
28859 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28860 }
28861 }
28862
28863 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
28864 for (uint32_t n = 1; n <= 8; n++) {
28865 GemmMicrokernelTester()
28866 .mr(3)
28867 .nr(8)
28868 .kr(1)
28869 .sr(4)
28870 .m(3)
28871 .n(n)
28872 .k(4)
28873 .iterations(1)
28874 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28875 }
28876 }
28877
28878 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4) {
28879 for (size_t k = 1; k < 4; k++) {
28880 GemmMicrokernelTester()
28881 .mr(3)
28882 .nr(8)
28883 .kr(1)
28884 .sr(4)
28885 .m(3)
28886 .n(8)
28887 .k(k)
28888 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28889 }
28890 }
28891
28892 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
28893 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028894 for (uint32_t n = 1; n <= 8; n++) {
28895 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028896 GemmMicrokernelTester()
28897 .mr(3)
28898 .nr(8)
28899 .kr(1)
28900 .sr(4)
28901 .m(m)
28902 .n(n)
28903 .k(k)
28904 .iterations(1)
28905 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28906 }
28907 }
28908 }
28909 }
28910
28911 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4) {
28912 for (size_t k = 5; k < 8; k++) {
28913 GemmMicrokernelTester()
28914 .mr(3)
28915 .nr(8)
28916 .kr(1)
28917 .sr(4)
28918 .m(3)
28919 .n(8)
28920 .k(k)
28921 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28922 }
28923 }
28924
28925 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
28926 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028927 for (uint32_t n = 1; n <= 8; n++) {
28928 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028929 GemmMicrokernelTester()
28930 .mr(3)
28931 .nr(8)
28932 .kr(1)
28933 .sr(4)
28934 .m(m)
28935 .n(n)
28936 .k(k)
28937 .iterations(1)
28938 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28939 }
28940 }
28941 }
28942 }
28943
28944 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4) {
28945 for (size_t k = 8; k <= 40; k += 4) {
28946 GemmMicrokernelTester()
28947 .mr(3)
28948 .nr(8)
28949 .kr(1)
28950 .sr(4)
28951 .m(3)
28952 .n(8)
28953 .k(k)
28954 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28955 }
28956 }
28957
28958 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, k_div_4_subtile) {
28959 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028960 for (uint32_t n = 1; n <= 8; n++) {
28961 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028962 GemmMicrokernelTester()
28963 .mr(3)
28964 .nr(8)
28965 .kr(1)
28966 .sr(4)
28967 .m(m)
28968 .n(n)
28969 .k(k)
28970 .iterations(1)
28971 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28972 }
28973 }
28974 }
28975 }
28976
28977 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8) {
28978 for (uint32_t n = 9; n < 16; n++) {
28979 for (size_t k = 1; k <= 20; k += 5) {
28980 GemmMicrokernelTester()
28981 .mr(3)
28982 .nr(8)
28983 .kr(1)
28984 .sr(4)
28985 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028986 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080028987 .k(k)
28988 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
28989 }
28990 }
28991 }
28992
28993 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
28994 for (uint32_t n = 9; n < 16; n++) {
28995 for (size_t k = 1; k <= 20; k += 5) {
28996 GemmMicrokernelTester()
28997 .mr(3)
28998 .nr(8)
28999 .kr(1)
29000 .sr(4)
29001 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029002 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029003 .k(k)
29004 .cn_stride(11)
29005 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29006 }
29007 }
29008 }
29009
29010 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
29011 for (uint32_t n = 9; n < 16; n++) {
29012 for (size_t k = 1; k <= 20; k += 5) {
29013 for (uint32_t m = 1; m <= 3; m++) {
29014 GemmMicrokernelTester()
29015 .mr(3)
29016 .nr(8)
29017 .kr(1)
29018 .sr(4)
29019 .m(m)
29020 .n(n)
29021 .k(k)
29022 .iterations(1)
29023 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29024 }
29025 }
29026 }
29027 }
29028
29029 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8) {
29030 for (uint32_t n = 16; n <= 24; n += 8) {
29031 for (size_t k = 1; k <= 20; k += 5) {
29032 GemmMicrokernelTester()
29033 .mr(3)
29034 .nr(8)
29035 .kr(1)
29036 .sr(4)
29037 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029038 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029039 .k(k)
29040 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29041 }
29042 }
29043 }
29044
29045 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
29046 for (uint32_t n = 16; n <= 24; n += 8) {
29047 for (size_t k = 1; k <= 20; k += 5) {
29048 GemmMicrokernelTester()
29049 .mr(3)
29050 .nr(8)
29051 .kr(1)
29052 .sr(4)
29053 .m(3)
29054 .n(n)
29055 .k(k)
29056 .cn_stride(11)
29057 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29058 }
29059 }
29060 }
29061
29062 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_subtile) {
29063 for (uint32_t n = 16; n <= 24; n += 8) {
29064 for (size_t k = 1; k <= 20; k += 5) {
29065 for (uint32_t m = 1; m <= 3; m++) {
29066 GemmMicrokernelTester()
29067 .mr(3)
29068 .nr(8)
29069 .kr(1)
29070 .sr(4)
29071 .m(m)
29072 .n(n)
29073 .k(k)
29074 .iterations(1)
29075 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29076 }
29077 }
29078 }
29079 }
29080
29081 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, small_kernel) {
29082 for (size_t k = 1; k <= 20; k += 5) {
29083 GemmMicrokernelTester()
29084 .mr(3)
29085 .nr(8)
29086 .kr(1)
29087 .sr(4)
29088 .m(3)
29089 .n(8)
29090 .k(k)
29091 .ks(3)
29092 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29093 }
29094 }
29095
29096 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, small_kernel_subtile) {
29097 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029098 for (uint32_t n = 1; n <= 8; n++) {
29099 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029100 GemmMicrokernelTester()
29101 .mr(3)
29102 .nr(8)
29103 .kr(1)
29104 .sr(4)
29105 .m(m)
29106 .n(n)
29107 .k(k)
29108 .ks(3)
29109 .iterations(1)
29110 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29111 }
29112 }
29113 }
29114 }
29115
29116 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
29117 for (uint32_t n = 9; n < 16; n++) {
29118 for (size_t k = 1; k <= 20; k += 5) {
29119 GemmMicrokernelTester()
29120 .mr(3)
29121 .nr(8)
29122 .kr(1)
29123 .sr(4)
29124 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029125 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029126 .k(k)
29127 .ks(3)
29128 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29129 }
29130 }
29131 }
29132
29133 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
29134 for (uint32_t n = 16; n <= 24; n += 8) {
29135 for (size_t k = 1; k <= 20; k += 5) {
29136 GemmMicrokernelTester()
29137 .mr(3)
29138 .nr(8)
29139 .kr(1)
29140 .sr(4)
29141 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029142 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029143 .k(k)
29144 .ks(3)
29145 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29146 }
29147 }
29148 }
29149
29150 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm_subtile) {
29151 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029152 for (uint32_t n = 1; n <= 8; n++) {
29153 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029154 GemmMicrokernelTester()
29155 .mr(3)
29156 .nr(8)
29157 .kr(1)
29158 .sr(4)
29159 .m(m)
29160 .n(n)
29161 .k(k)
29162 .cm_stride(11)
29163 .iterations(1)
29164 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29165 }
29166 }
29167 }
29168 }
29169
29170 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, a_offset) {
29171 for (size_t k = 1; k <= 20; k += 5) {
29172 GemmMicrokernelTester()
29173 .mr(3)
29174 .nr(8)
29175 .kr(1)
29176 .sr(4)
29177 .m(3)
29178 .n(8)
29179 .k(k)
29180 .ks(3)
29181 .a_offset(67)
29182 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29183 }
29184 }
29185
29186 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029187 for (size_t k = 1; k <= 20; k += 5) {
29188 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029189 GemmMicrokernelTester()
29190 .mr(3)
29191 .nr(8)
29192 .kr(1)
29193 .sr(4)
29194 .m(3)
29195 .n(8)
29196 .k(k)
29197 .ks(3)
29198 .a_offset(67)
29199 .zero_index(mz)
29200 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29201 }
29202 }
29203 }
29204
29205 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, qmin) {
29206 GemmMicrokernelTester()
29207 .mr(3)
29208 .nr(8)
29209 .kr(1)
29210 .sr(4)
29211 .m(3)
29212 .n(8)
29213 .k(4)
29214 .qmin(128)
29215 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29216 }
29217
29218 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, qmax) {
29219 GemmMicrokernelTester()
29220 .mr(3)
29221 .nr(8)
29222 .kr(1)
29223 .sr(4)
29224 .m(3)
29225 .n(8)
29226 .k(4)
29227 .qmax(128)
29228 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29229 }
29230
29231 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_ARM, strided_cm) {
29232 GemmMicrokernelTester()
29233 .mr(3)
29234 .nr(8)
29235 .kr(1)
29236 .sr(4)
29237 .m(3)
29238 .n(8)
29239 .k(4)
29240 .cm_stride(11)
29241 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29242 }
29243#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29244
29245
29246#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29247 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4) {
29248 GemmMicrokernelTester()
29249 .mr(6)
29250 .nr(8)
29251 .kr(1)
29252 .sr(4)
29253 .m(6)
29254 .n(8)
29255 .k(4)
29256 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29257 }
29258
29259 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cn) {
29260 GemmMicrokernelTester()
29261 .mr(6)
29262 .nr(8)
29263 .kr(1)
29264 .sr(4)
29265 .m(6)
29266 .n(8)
29267 .k(4)
29268 .cn_stride(11)
29269 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29270 }
29271
29272 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029273 for (uint32_t n = 1; n <= 8; n++) {
29274 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029275 GemmMicrokernelTester()
29276 .mr(6)
29277 .nr(8)
29278 .kr(1)
29279 .sr(4)
29280 .m(m)
29281 .n(n)
29282 .k(4)
29283 .iterations(1)
29284 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29285 }
29286 }
29287 }
29288
29289 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_m) {
29290 for (uint32_t m = 1; m <= 6; m++) {
29291 GemmMicrokernelTester()
29292 .mr(6)
29293 .nr(8)
29294 .kr(1)
29295 .sr(4)
29296 .m(m)
29297 .n(8)
29298 .k(4)
29299 .iterations(1)
29300 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29301 }
29302 }
29303
29304 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_eq_4_subtile_n) {
29305 for (uint32_t n = 1; n <= 8; n++) {
29306 GemmMicrokernelTester()
29307 .mr(6)
29308 .nr(8)
29309 .kr(1)
29310 .sr(4)
29311 .m(6)
29312 .n(n)
29313 .k(4)
29314 .iterations(1)
29315 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29316 }
29317 }
29318
29319 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4) {
29320 for (size_t k = 1; k < 4; k++) {
29321 GemmMicrokernelTester()
29322 .mr(6)
29323 .nr(8)
29324 .kr(1)
29325 .sr(4)
29326 .m(6)
29327 .n(8)
29328 .k(k)
29329 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29330 }
29331 }
29332
29333 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_lt_4_subtile) {
29334 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029335 for (uint32_t n = 1; n <= 8; n++) {
29336 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029337 GemmMicrokernelTester()
29338 .mr(6)
29339 .nr(8)
29340 .kr(1)
29341 .sr(4)
29342 .m(m)
29343 .n(n)
29344 .k(k)
29345 .iterations(1)
29346 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29347 }
29348 }
29349 }
29350 }
29351
29352 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4) {
29353 for (size_t k = 5; k < 8; k++) {
29354 GemmMicrokernelTester()
29355 .mr(6)
29356 .nr(8)
29357 .kr(1)
29358 .sr(4)
29359 .m(6)
29360 .n(8)
29361 .k(k)
29362 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29363 }
29364 }
29365
29366 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_gt_4_subtile) {
29367 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029368 for (uint32_t n = 1; n <= 8; n++) {
29369 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029370 GemmMicrokernelTester()
29371 .mr(6)
29372 .nr(8)
29373 .kr(1)
29374 .sr(4)
29375 .m(m)
29376 .n(n)
29377 .k(k)
29378 .iterations(1)
29379 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29380 }
29381 }
29382 }
29383 }
29384
29385 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4) {
29386 for (size_t k = 8; k <= 40; k += 4) {
29387 GemmMicrokernelTester()
29388 .mr(6)
29389 .nr(8)
29390 .kr(1)
29391 .sr(4)
29392 .m(6)
29393 .n(8)
29394 .k(k)
29395 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29396 }
29397 }
29398
29399 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, k_div_4_subtile) {
29400 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029401 for (uint32_t n = 1; n <= 8; n++) {
29402 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029403 GemmMicrokernelTester()
29404 .mr(6)
29405 .nr(8)
29406 .kr(1)
29407 .sr(4)
29408 .m(m)
29409 .n(n)
29410 .k(k)
29411 .iterations(1)
29412 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29413 }
29414 }
29415 }
29416 }
29417
29418 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8) {
29419 for (uint32_t n = 9; n < 16; n++) {
29420 for (size_t k = 1; k <= 20; k += 5) {
29421 GemmMicrokernelTester()
29422 .mr(6)
29423 .nr(8)
29424 .kr(1)
29425 .sr(4)
29426 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029427 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029428 .k(k)
29429 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29430 }
29431 }
29432 }
29433
29434 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_strided_cn) {
29435 for (uint32_t n = 9; n < 16; n++) {
29436 for (size_t k = 1; k <= 20; k += 5) {
29437 GemmMicrokernelTester()
29438 .mr(6)
29439 .nr(8)
29440 .kr(1)
29441 .sr(4)
29442 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029443 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029444 .k(k)
29445 .cn_stride(11)
29446 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29447 }
29448 }
29449 }
29450
29451 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_subtile) {
29452 for (uint32_t n = 9; n < 16; n++) {
29453 for (size_t k = 1; k <= 20; k += 5) {
29454 for (uint32_t m = 1; m <= 6; m++) {
29455 GemmMicrokernelTester()
29456 .mr(6)
29457 .nr(8)
29458 .kr(1)
29459 .sr(4)
29460 .m(m)
29461 .n(n)
29462 .k(k)
29463 .iterations(1)
29464 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29465 }
29466 }
29467 }
29468 }
29469
29470 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8) {
29471 for (uint32_t n = 16; n <= 24; n += 8) {
29472 for (size_t k = 1; k <= 20; k += 5) {
29473 GemmMicrokernelTester()
29474 .mr(6)
29475 .nr(8)
29476 .kr(1)
29477 .sr(4)
29478 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029479 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029480 .k(k)
29481 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29482 }
29483 }
29484 }
29485
29486 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_strided_cn) {
29487 for (uint32_t n = 16; n <= 24; n += 8) {
29488 for (size_t k = 1; k <= 20; k += 5) {
29489 GemmMicrokernelTester()
29490 .mr(6)
29491 .nr(8)
29492 .kr(1)
29493 .sr(4)
29494 .m(6)
29495 .n(n)
29496 .k(k)
29497 .cn_stride(11)
29498 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29499 }
29500 }
29501 }
29502
29503 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_subtile) {
29504 for (uint32_t n = 16; n <= 24; n += 8) {
29505 for (size_t k = 1; k <= 20; k += 5) {
29506 for (uint32_t m = 1; m <= 6; m++) {
29507 GemmMicrokernelTester()
29508 .mr(6)
29509 .nr(8)
29510 .kr(1)
29511 .sr(4)
29512 .m(m)
29513 .n(n)
29514 .k(k)
29515 .iterations(1)
29516 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29517 }
29518 }
29519 }
29520 }
29521
29522 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, small_kernel) {
29523 for (size_t k = 1; k <= 20; k += 5) {
29524 GemmMicrokernelTester()
29525 .mr(6)
29526 .nr(8)
29527 .kr(1)
29528 .sr(4)
29529 .m(6)
29530 .n(8)
29531 .k(k)
29532 .ks(3)
29533 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29534 }
29535 }
29536
29537 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, small_kernel_subtile) {
29538 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029539 for (uint32_t n = 1; n <= 8; n++) {
29540 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029541 GemmMicrokernelTester()
29542 .mr(6)
29543 .nr(8)
29544 .kr(1)
29545 .sr(4)
29546 .m(m)
29547 .n(n)
29548 .k(k)
29549 .ks(3)
29550 .iterations(1)
29551 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29552 }
29553 }
29554 }
29555 }
29556
29557 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_gt_8_small_kernel) {
29558 for (uint32_t n = 9; n < 16; n++) {
29559 for (size_t k = 1; k <= 20; k += 5) {
29560 GemmMicrokernelTester()
29561 .mr(6)
29562 .nr(8)
29563 .kr(1)
29564 .sr(4)
29565 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029566 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029567 .k(k)
29568 .ks(3)
29569 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29570 }
29571 }
29572 }
29573
29574 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, n_div_8_small_kernel) {
29575 for (uint32_t n = 16; n <= 24; n += 8) {
29576 for (size_t k = 1; k <= 20; k += 5) {
29577 GemmMicrokernelTester()
29578 .mr(6)
29579 .nr(8)
29580 .kr(1)
29581 .sr(4)
29582 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029583 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029584 .k(k)
29585 .ks(3)
29586 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29587 }
29588 }
29589 }
29590
29591 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm_subtile) {
29592 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029593 for (uint32_t n = 1; n <= 8; n++) {
29594 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029595 GemmMicrokernelTester()
29596 .mr(6)
29597 .nr(8)
29598 .kr(1)
29599 .sr(4)
29600 .m(m)
29601 .n(n)
29602 .k(k)
29603 .cm_stride(11)
29604 .iterations(1)
29605 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29606 }
29607 }
29608 }
29609 }
29610
29611 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, a_offset) {
29612 for (size_t k = 1; k <= 20; k += 5) {
29613 GemmMicrokernelTester()
29614 .mr(6)
29615 .nr(8)
29616 .kr(1)
29617 .sr(4)
29618 .m(6)
29619 .n(8)
29620 .k(k)
29621 .ks(3)
29622 .a_offset(127)
29623 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29624 }
29625 }
29626
29627 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029628 for (size_t k = 1; k <= 20; k += 5) {
29629 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029630 GemmMicrokernelTester()
29631 .mr(6)
29632 .nr(8)
29633 .kr(1)
29634 .sr(4)
29635 .m(6)
29636 .n(8)
29637 .k(k)
29638 .ks(3)
29639 .a_offset(127)
29640 .zero_index(mz)
29641 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29642 }
29643 }
29644 }
29645
29646 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, qmin) {
29647 GemmMicrokernelTester()
29648 .mr(6)
29649 .nr(8)
29650 .kr(1)
29651 .sr(4)
29652 .m(6)
29653 .n(8)
29654 .k(4)
29655 .qmin(128)
29656 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29657 }
29658
29659 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, qmax) {
29660 GemmMicrokernelTester()
29661 .mr(6)
29662 .nr(8)
29663 .kr(1)
29664 .sr(4)
29665 .m(6)
29666 .n(8)
29667 .k(4)
29668 .qmax(128)
29669 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29670 }
29671
29672 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_ARM, strided_cm) {
29673 GemmMicrokernelTester()
29674 .mr(6)
29675 .nr(8)
29676 .kr(1)
29677 .sr(4)
29678 .m(6)
29679 .n(8)
29680 .k(4)
29681 .cm_stride(11)
29682 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
29683 }
29684#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29685
29686
29687#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29688 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4) {
29689 GemmMicrokernelTester()
29690 .mr(3)
29691 .nr(8)
29692 .kr(1)
29693 .sr(4)
29694 .m(3)
29695 .n(8)
29696 .k(4)
29697 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29698 }
29699
29700 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cn) {
29701 GemmMicrokernelTester()
29702 .mr(3)
29703 .nr(8)
29704 .kr(1)
29705 .sr(4)
29706 .m(3)
29707 .n(8)
29708 .k(4)
29709 .cn_stride(11)
29710 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29711 }
29712
29713 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029714 for (uint32_t n = 1; n <= 8; n++) {
29715 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029716 GemmMicrokernelTester()
29717 .mr(3)
29718 .nr(8)
29719 .kr(1)
29720 .sr(4)
29721 .m(m)
29722 .n(n)
29723 .k(4)
29724 .iterations(1)
29725 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29726 }
29727 }
29728 }
29729
29730 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
29731 for (uint32_t m = 1; m <= 3; m++) {
29732 GemmMicrokernelTester()
29733 .mr(3)
29734 .nr(8)
29735 .kr(1)
29736 .sr(4)
29737 .m(m)
29738 .n(8)
29739 .k(4)
29740 .iterations(1)
29741 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29742 }
29743 }
29744
29745 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
29746 for (uint32_t n = 1; n <= 8; n++) {
29747 GemmMicrokernelTester()
29748 .mr(3)
29749 .nr(8)
29750 .kr(1)
29751 .sr(4)
29752 .m(3)
29753 .n(n)
29754 .k(4)
29755 .iterations(1)
29756 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29757 }
29758 }
29759
29760 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4) {
29761 for (size_t k = 1; k < 4; k++) {
29762 GemmMicrokernelTester()
29763 .mr(3)
29764 .nr(8)
29765 .kr(1)
29766 .sr(4)
29767 .m(3)
29768 .n(8)
29769 .k(k)
29770 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29771 }
29772 }
29773
29774 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_lt_4_subtile) {
29775 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029776 for (uint32_t n = 1; n <= 8; n++) {
29777 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029778 GemmMicrokernelTester()
29779 .mr(3)
29780 .nr(8)
29781 .kr(1)
29782 .sr(4)
29783 .m(m)
29784 .n(n)
29785 .k(k)
29786 .iterations(1)
29787 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29788 }
29789 }
29790 }
29791 }
29792
29793 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4) {
29794 for (size_t k = 5; k < 8; k++) {
29795 GemmMicrokernelTester()
29796 .mr(3)
29797 .nr(8)
29798 .kr(1)
29799 .sr(4)
29800 .m(3)
29801 .n(8)
29802 .k(k)
29803 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29804 }
29805 }
29806
29807 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_gt_4_subtile) {
29808 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029809 for (uint32_t n = 1; n <= 8; n++) {
29810 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029811 GemmMicrokernelTester()
29812 .mr(3)
29813 .nr(8)
29814 .kr(1)
29815 .sr(4)
29816 .m(m)
29817 .n(n)
29818 .k(k)
29819 .iterations(1)
29820 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29821 }
29822 }
29823 }
29824 }
29825
29826 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4) {
29827 for (size_t k = 8; k <= 40; k += 4) {
29828 GemmMicrokernelTester()
29829 .mr(3)
29830 .nr(8)
29831 .kr(1)
29832 .sr(4)
29833 .m(3)
29834 .n(8)
29835 .k(k)
29836 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29837 }
29838 }
29839
29840 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, k_div_4_subtile) {
29841 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029842 for (uint32_t n = 1; n <= 8; n++) {
29843 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029844 GemmMicrokernelTester()
29845 .mr(3)
29846 .nr(8)
29847 .kr(1)
29848 .sr(4)
29849 .m(m)
29850 .n(n)
29851 .k(k)
29852 .iterations(1)
29853 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29854 }
29855 }
29856 }
29857 }
29858
29859 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8) {
29860 for (uint32_t n = 9; n < 16; n++) {
29861 for (size_t k = 1; k <= 20; k += 5) {
29862 GemmMicrokernelTester()
29863 .mr(3)
29864 .nr(8)
29865 .kr(1)
29866 .sr(4)
29867 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029868 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029869 .k(k)
29870 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29871 }
29872 }
29873 }
29874
29875 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
29876 for (uint32_t n = 9; n < 16; n++) {
29877 for (size_t k = 1; k <= 20; k += 5) {
29878 GemmMicrokernelTester()
29879 .mr(3)
29880 .nr(8)
29881 .kr(1)
29882 .sr(4)
29883 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029884 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029885 .k(k)
29886 .cn_stride(11)
29887 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29888 }
29889 }
29890 }
29891
29892 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_subtile) {
29893 for (uint32_t n = 9; n < 16; n++) {
29894 for (size_t k = 1; k <= 20; k += 5) {
29895 for (uint32_t m = 1; m <= 3; m++) {
29896 GemmMicrokernelTester()
29897 .mr(3)
29898 .nr(8)
29899 .kr(1)
29900 .sr(4)
29901 .m(m)
29902 .n(n)
29903 .k(k)
29904 .iterations(1)
29905 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29906 }
29907 }
29908 }
29909 }
29910
29911 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8) {
29912 for (uint32_t n = 16; n <= 24; n += 8) {
29913 for (size_t k = 1; k <= 20; k += 5) {
29914 GemmMicrokernelTester()
29915 .mr(3)
29916 .nr(8)
29917 .kr(1)
29918 .sr(4)
29919 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029920 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029921 .k(k)
29922 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29923 }
29924 }
29925 }
29926
29927 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
29928 for (uint32_t n = 16; n <= 24; n += 8) {
29929 for (size_t k = 1; k <= 20; k += 5) {
29930 GemmMicrokernelTester()
29931 .mr(3)
29932 .nr(8)
29933 .kr(1)
29934 .sr(4)
29935 .m(3)
29936 .n(n)
29937 .k(k)
29938 .cn_stride(11)
29939 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29940 }
29941 }
29942 }
29943
29944 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_subtile) {
29945 for (uint32_t n = 16; n <= 24; n += 8) {
29946 for (size_t k = 1; k <= 20; k += 5) {
29947 for (uint32_t m = 1; m <= 3; m++) {
29948 GemmMicrokernelTester()
29949 .mr(3)
29950 .nr(8)
29951 .kr(1)
29952 .sr(4)
29953 .m(m)
29954 .n(n)
29955 .k(k)
29956 .iterations(1)
29957 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29958 }
29959 }
29960 }
29961 }
29962
29963 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, small_kernel) {
29964 for (size_t k = 1; k <= 20; k += 5) {
29965 GemmMicrokernelTester()
29966 .mr(3)
29967 .nr(8)
29968 .kr(1)
29969 .sr(4)
29970 .m(3)
29971 .n(8)
29972 .k(k)
29973 .ks(3)
29974 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29975 }
29976 }
29977
29978 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, small_kernel_subtile) {
29979 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029980 for (uint32_t n = 1; n <= 8; n++) {
29981 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080029982 GemmMicrokernelTester()
29983 .mr(3)
29984 .nr(8)
29985 .kr(1)
29986 .sr(4)
29987 .m(m)
29988 .n(n)
29989 .k(k)
29990 .ks(3)
29991 .iterations(1)
29992 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
29993 }
29994 }
29995 }
29996 }
29997
29998 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
29999 for (uint32_t n = 9; n < 16; n++) {
30000 for (size_t k = 1; k <= 20; k += 5) {
30001 GemmMicrokernelTester()
30002 .mr(3)
30003 .nr(8)
30004 .kr(1)
30005 .sr(4)
30006 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030007 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030008 .k(k)
30009 .ks(3)
30010 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30011 }
30012 }
30013 }
30014
30015 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
30016 for (uint32_t n = 16; n <= 24; n += 8) {
30017 for (size_t k = 1; k <= 20; k += 5) {
30018 GemmMicrokernelTester()
30019 .mr(3)
30020 .nr(8)
30021 .kr(1)
30022 .sr(4)
30023 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030024 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030025 .k(k)
30026 .ks(3)
30027 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30028 }
30029 }
30030 }
30031
30032 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cm_subtile) {
30033 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030034 for (uint32_t n = 1; n <= 8; n++) {
30035 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030036 GemmMicrokernelTester()
30037 .mr(3)
30038 .nr(8)
30039 .kr(1)
30040 .sr(4)
30041 .m(m)
30042 .n(n)
30043 .k(k)
30044 .cm_stride(11)
30045 .iterations(1)
30046 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30047 }
30048 }
30049 }
30050 }
30051
30052 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, a_offset) {
30053 for (size_t k = 1; k <= 20; k += 5) {
30054 GemmMicrokernelTester()
30055 .mr(3)
30056 .nr(8)
30057 .kr(1)
30058 .sr(4)
30059 .m(3)
30060 .n(8)
30061 .k(k)
30062 .ks(3)
30063 .a_offset(67)
30064 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30065 }
30066 }
30067
30068 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030069 for (size_t k = 1; k <= 20; k += 5) {
30070 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030071 GemmMicrokernelTester()
30072 .mr(3)
30073 .nr(8)
30074 .kr(1)
30075 .sr(4)
30076 .m(3)
30077 .n(8)
30078 .k(k)
30079 .ks(3)
30080 .a_offset(67)
30081 .zero_index(mz)
30082 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30083 }
30084 }
30085 }
30086
30087 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, qmin) {
30088 GemmMicrokernelTester()
30089 .mr(3)
30090 .nr(8)
30091 .kr(1)
30092 .sr(4)
30093 .m(3)
30094 .n(8)
30095 .k(4)
30096 .qmin(128)
30097 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30098 }
30099
30100 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, qmax) {
30101 GemmMicrokernelTester()
30102 .mr(3)
30103 .nr(8)
30104 .kr(1)
30105 .sr(4)
30106 .m(3)
30107 .n(8)
30108 .k(4)
30109 .qmax(128)
30110 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30111 }
30112
30113 TEST(F32_IGEMM_MINMAX_3X8S4__WASMSIMD_X86, strided_cm) {
30114 GemmMicrokernelTester()
30115 .mr(3)
30116 .nr(8)
30117 .kr(1)
30118 .sr(4)
30119 .m(3)
30120 .n(8)
30121 .k(4)
30122 .cm_stride(11)
30123 .Test(xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30124 }
30125#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30126
30127
30128#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30129 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4) {
30130 GemmMicrokernelTester()
30131 .mr(6)
30132 .nr(8)
30133 .kr(1)
30134 .sr(4)
30135 .m(6)
30136 .n(8)
30137 .k(4)
30138 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30139 }
30140
30141 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cn) {
30142 GemmMicrokernelTester()
30143 .mr(6)
30144 .nr(8)
30145 .kr(1)
30146 .sr(4)
30147 .m(6)
30148 .n(8)
30149 .k(4)
30150 .cn_stride(11)
30151 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30152 }
30153
30154 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030155 for (uint32_t n = 1; n <= 8; n++) {
30156 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030157 GemmMicrokernelTester()
30158 .mr(6)
30159 .nr(8)
30160 .kr(1)
30161 .sr(4)
30162 .m(m)
30163 .n(n)
30164 .k(4)
30165 .iterations(1)
30166 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30167 }
30168 }
30169 }
30170
30171 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_m) {
30172 for (uint32_t m = 1; m <= 6; m++) {
30173 GemmMicrokernelTester()
30174 .mr(6)
30175 .nr(8)
30176 .kr(1)
30177 .sr(4)
30178 .m(m)
30179 .n(8)
30180 .k(4)
30181 .iterations(1)
30182 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30183 }
30184 }
30185
30186 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_eq_4_subtile_n) {
30187 for (uint32_t n = 1; n <= 8; n++) {
30188 GemmMicrokernelTester()
30189 .mr(6)
30190 .nr(8)
30191 .kr(1)
30192 .sr(4)
30193 .m(6)
30194 .n(n)
30195 .k(4)
30196 .iterations(1)
30197 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30198 }
30199 }
30200
30201 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4) {
30202 for (size_t k = 1; k < 4; k++) {
30203 GemmMicrokernelTester()
30204 .mr(6)
30205 .nr(8)
30206 .kr(1)
30207 .sr(4)
30208 .m(6)
30209 .n(8)
30210 .k(k)
30211 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30212 }
30213 }
30214
30215 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_lt_4_subtile) {
30216 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030217 for (uint32_t n = 1; n <= 8; n++) {
30218 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030219 GemmMicrokernelTester()
30220 .mr(6)
30221 .nr(8)
30222 .kr(1)
30223 .sr(4)
30224 .m(m)
30225 .n(n)
30226 .k(k)
30227 .iterations(1)
30228 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30229 }
30230 }
30231 }
30232 }
30233
30234 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4) {
30235 for (size_t k = 5; k < 8; k++) {
30236 GemmMicrokernelTester()
30237 .mr(6)
30238 .nr(8)
30239 .kr(1)
30240 .sr(4)
30241 .m(6)
30242 .n(8)
30243 .k(k)
30244 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30245 }
30246 }
30247
30248 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_gt_4_subtile) {
30249 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030250 for (uint32_t n = 1; n <= 8; n++) {
30251 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030252 GemmMicrokernelTester()
30253 .mr(6)
30254 .nr(8)
30255 .kr(1)
30256 .sr(4)
30257 .m(m)
30258 .n(n)
30259 .k(k)
30260 .iterations(1)
30261 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30262 }
30263 }
30264 }
30265 }
30266
30267 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4) {
30268 for (size_t k = 8; k <= 40; k += 4) {
30269 GemmMicrokernelTester()
30270 .mr(6)
30271 .nr(8)
30272 .kr(1)
30273 .sr(4)
30274 .m(6)
30275 .n(8)
30276 .k(k)
30277 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30278 }
30279 }
30280
30281 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, k_div_4_subtile) {
30282 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030283 for (uint32_t n = 1; n <= 8; n++) {
30284 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030285 GemmMicrokernelTester()
30286 .mr(6)
30287 .nr(8)
30288 .kr(1)
30289 .sr(4)
30290 .m(m)
30291 .n(n)
30292 .k(k)
30293 .iterations(1)
30294 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30295 }
30296 }
30297 }
30298 }
30299
30300 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8) {
30301 for (uint32_t n = 9; n < 16; n++) {
30302 for (size_t k = 1; k <= 20; k += 5) {
30303 GemmMicrokernelTester()
30304 .mr(6)
30305 .nr(8)
30306 .kr(1)
30307 .sr(4)
30308 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030309 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030310 .k(k)
30311 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30312 }
30313 }
30314 }
30315
30316 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_strided_cn) {
30317 for (uint32_t n = 9; n < 16; n++) {
30318 for (size_t k = 1; k <= 20; k += 5) {
30319 GemmMicrokernelTester()
30320 .mr(6)
30321 .nr(8)
30322 .kr(1)
30323 .sr(4)
30324 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030325 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030326 .k(k)
30327 .cn_stride(11)
30328 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30329 }
30330 }
30331 }
30332
30333 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_subtile) {
30334 for (uint32_t n = 9; n < 16; n++) {
30335 for (size_t k = 1; k <= 20; k += 5) {
30336 for (uint32_t m = 1; m <= 6; m++) {
30337 GemmMicrokernelTester()
30338 .mr(6)
30339 .nr(8)
30340 .kr(1)
30341 .sr(4)
30342 .m(m)
30343 .n(n)
30344 .k(k)
30345 .iterations(1)
30346 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30347 }
30348 }
30349 }
30350 }
30351
30352 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8) {
30353 for (uint32_t n = 16; n <= 24; n += 8) {
30354 for (size_t k = 1; k <= 20; k += 5) {
30355 GemmMicrokernelTester()
30356 .mr(6)
30357 .nr(8)
30358 .kr(1)
30359 .sr(4)
30360 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030361 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030362 .k(k)
30363 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30364 }
30365 }
30366 }
30367
30368 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_strided_cn) {
30369 for (uint32_t n = 16; n <= 24; n += 8) {
30370 for (size_t k = 1; k <= 20; k += 5) {
30371 GemmMicrokernelTester()
30372 .mr(6)
30373 .nr(8)
30374 .kr(1)
30375 .sr(4)
30376 .m(6)
30377 .n(n)
30378 .k(k)
30379 .cn_stride(11)
30380 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30381 }
30382 }
30383 }
30384
30385 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_subtile) {
30386 for (uint32_t n = 16; n <= 24; n += 8) {
30387 for (size_t k = 1; k <= 20; k += 5) {
30388 for (uint32_t m = 1; m <= 6; m++) {
30389 GemmMicrokernelTester()
30390 .mr(6)
30391 .nr(8)
30392 .kr(1)
30393 .sr(4)
30394 .m(m)
30395 .n(n)
30396 .k(k)
30397 .iterations(1)
30398 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30399 }
30400 }
30401 }
30402 }
30403
30404 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, small_kernel) {
30405 for (size_t k = 1; k <= 20; k += 5) {
30406 GemmMicrokernelTester()
30407 .mr(6)
30408 .nr(8)
30409 .kr(1)
30410 .sr(4)
30411 .m(6)
30412 .n(8)
30413 .k(k)
30414 .ks(3)
30415 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30416 }
30417 }
30418
30419 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, small_kernel_subtile) {
30420 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030421 for (uint32_t n = 1; n <= 8; n++) {
30422 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030423 GemmMicrokernelTester()
30424 .mr(6)
30425 .nr(8)
30426 .kr(1)
30427 .sr(4)
30428 .m(m)
30429 .n(n)
30430 .k(k)
30431 .ks(3)
30432 .iterations(1)
30433 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30434 }
30435 }
30436 }
30437 }
30438
30439 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_gt_8_small_kernel) {
30440 for (uint32_t n = 9; n < 16; n++) {
30441 for (size_t k = 1; k <= 20; k += 5) {
30442 GemmMicrokernelTester()
30443 .mr(6)
30444 .nr(8)
30445 .kr(1)
30446 .sr(4)
30447 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030448 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030449 .k(k)
30450 .ks(3)
30451 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30452 }
30453 }
30454 }
30455
30456 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, n_div_8_small_kernel) {
30457 for (uint32_t n = 16; n <= 24; n += 8) {
30458 for (size_t k = 1; k <= 20; k += 5) {
30459 GemmMicrokernelTester()
30460 .mr(6)
30461 .nr(8)
30462 .kr(1)
30463 .sr(4)
30464 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030465 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030466 .k(k)
30467 .ks(3)
30468 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30469 }
30470 }
30471 }
30472
30473 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cm_subtile) {
30474 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030475 for (uint32_t n = 1; n <= 8; n++) {
30476 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030477 GemmMicrokernelTester()
30478 .mr(6)
30479 .nr(8)
30480 .kr(1)
30481 .sr(4)
30482 .m(m)
30483 .n(n)
30484 .k(k)
30485 .cm_stride(11)
30486 .iterations(1)
30487 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30488 }
30489 }
30490 }
30491 }
30492
30493 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, a_offset) {
30494 for (size_t k = 1; k <= 20; k += 5) {
30495 GemmMicrokernelTester()
30496 .mr(6)
30497 .nr(8)
30498 .kr(1)
30499 .sr(4)
30500 .m(6)
30501 .n(8)
30502 .k(k)
30503 .ks(3)
30504 .a_offset(127)
30505 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30506 }
30507 }
30508
30509 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030510 for (size_t k = 1; k <= 20; k += 5) {
30511 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030512 GemmMicrokernelTester()
30513 .mr(6)
30514 .nr(8)
30515 .kr(1)
30516 .sr(4)
30517 .m(6)
30518 .n(8)
30519 .k(k)
30520 .ks(3)
30521 .a_offset(127)
30522 .zero_index(mz)
30523 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30524 }
30525 }
30526 }
30527
30528 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, qmin) {
30529 GemmMicrokernelTester()
30530 .mr(6)
30531 .nr(8)
30532 .kr(1)
30533 .sr(4)
30534 .m(6)
30535 .n(8)
30536 .k(4)
30537 .qmin(128)
30538 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30539 }
30540
30541 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, qmax) {
30542 GemmMicrokernelTester()
30543 .mr(6)
30544 .nr(8)
30545 .kr(1)
30546 .sr(4)
30547 .m(6)
30548 .n(8)
30549 .k(4)
30550 .qmax(128)
30551 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30552 }
30553
30554 TEST(F32_IGEMM_MINMAX_6X8S4__WASMSIMD_X86, strided_cm) {
30555 GemmMicrokernelTester()
30556 .mr(6)
30557 .nr(8)
30558 .kr(1)
30559 .sr(4)
30560 .m(6)
30561 .n(8)
30562 .k(4)
30563 .cm_stride(11)
30564 .Test(xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
30565 }
30566#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30567
30568
30569#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30570 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4) {
30571 GemmMicrokernelTester()
30572 .mr(4)
30573 .nr(2)
30574 .kr(4)
30575 .sr(1)
30576 .m(4)
30577 .n(2)
30578 .k(4)
30579 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30580 }
30581
30582 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cn) {
30583 GemmMicrokernelTester()
30584 .mr(4)
30585 .nr(2)
30586 .kr(4)
30587 .sr(1)
30588 .m(4)
30589 .n(2)
30590 .k(4)
30591 .cn_stride(5)
30592 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30593 }
30594
30595 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030596 for (uint32_t n = 1; n <= 2; n++) {
30597 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030598 GemmMicrokernelTester()
30599 .mr(4)
30600 .nr(2)
30601 .kr(4)
30602 .sr(1)
30603 .m(m)
30604 .n(n)
30605 .k(4)
30606 .iterations(1)
30607 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30608 }
30609 }
30610 }
30611
30612 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile_m) {
30613 for (uint32_t m = 1; m <= 4; m++) {
30614 GemmMicrokernelTester()
30615 .mr(4)
30616 .nr(2)
30617 .kr(4)
30618 .sr(1)
30619 .m(m)
30620 .n(2)
30621 .k(4)
30622 .iterations(1)
30623 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30624 }
30625 }
30626
30627 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_eq_4_subtile_n) {
30628 for (uint32_t n = 1; n <= 2; n++) {
30629 GemmMicrokernelTester()
30630 .mr(4)
30631 .nr(2)
30632 .kr(4)
30633 .sr(1)
30634 .m(4)
30635 .n(n)
30636 .k(4)
30637 .iterations(1)
30638 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30639 }
30640 }
30641
30642 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4) {
30643 for (size_t k = 1; k < 4; k++) {
30644 GemmMicrokernelTester()
30645 .mr(4)
30646 .nr(2)
30647 .kr(4)
30648 .sr(1)
30649 .m(4)
30650 .n(2)
30651 .k(k)
30652 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30653 }
30654 }
30655
30656 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_lt_4_subtile) {
30657 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030658 for (uint32_t n = 1; n <= 2; n++) {
30659 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030660 GemmMicrokernelTester()
30661 .mr(4)
30662 .nr(2)
30663 .kr(4)
30664 .sr(1)
30665 .m(m)
30666 .n(n)
30667 .k(k)
30668 .iterations(1)
30669 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30670 }
30671 }
30672 }
30673 }
30674
30675 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4) {
30676 for (size_t k = 5; k < 8; k++) {
30677 GemmMicrokernelTester()
30678 .mr(4)
30679 .nr(2)
30680 .kr(4)
30681 .sr(1)
30682 .m(4)
30683 .n(2)
30684 .k(k)
30685 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30686 }
30687 }
30688
30689 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_gt_4_subtile) {
30690 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030691 for (uint32_t n = 1; n <= 2; n++) {
30692 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030693 GemmMicrokernelTester()
30694 .mr(4)
30695 .nr(2)
30696 .kr(4)
30697 .sr(1)
30698 .m(m)
30699 .n(n)
30700 .k(k)
30701 .iterations(1)
30702 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30703 }
30704 }
30705 }
30706 }
30707
30708 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4) {
30709 for (size_t k = 8; k <= 40; k += 4) {
30710 GemmMicrokernelTester()
30711 .mr(4)
30712 .nr(2)
30713 .kr(4)
30714 .sr(1)
30715 .m(4)
30716 .n(2)
30717 .k(k)
30718 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30719 }
30720 }
30721
30722 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, k_div_4_subtile) {
30723 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030724 for (uint32_t n = 1; n <= 2; n++) {
30725 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030726 GemmMicrokernelTester()
30727 .mr(4)
30728 .nr(2)
30729 .kr(4)
30730 .sr(1)
30731 .m(m)
30732 .n(n)
30733 .k(k)
30734 .iterations(1)
30735 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30736 }
30737 }
30738 }
30739 }
30740
30741 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2) {
30742 for (uint32_t n = 3; n < 4; n++) {
30743 for (size_t k = 1; k <= 20; k += 5) {
30744 GemmMicrokernelTester()
30745 .mr(4)
30746 .nr(2)
30747 .kr(4)
30748 .sr(1)
30749 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030750 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030751 .k(k)
30752 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30753 }
30754 }
30755 }
30756
30757 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_strided_cn) {
30758 for (uint32_t n = 3; n < 4; n++) {
30759 for (size_t k = 1; k <= 20; k += 5) {
30760 GemmMicrokernelTester()
30761 .mr(4)
30762 .nr(2)
30763 .kr(4)
30764 .sr(1)
30765 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030766 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030767 .k(k)
30768 .cn_stride(5)
30769 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30770 }
30771 }
30772 }
30773
30774 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_subtile) {
30775 for (uint32_t n = 3; n < 4; n++) {
30776 for (size_t k = 1; k <= 20; k += 5) {
30777 for (uint32_t m = 1; m <= 4; m++) {
30778 GemmMicrokernelTester()
30779 .mr(4)
30780 .nr(2)
30781 .kr(4)
30782 .sr(1)
30783 .m(m)
30784 .n(n)
30785 .k(k)
30786 .iterations(1)
30787 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30788 }
30789 }
30790 }
30791 }
30792
30793 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2) {
30794 for (uint32_t n = 4; n <= 6; n += 2) {
30795 for (size_t k = 1; k <= 20; k += 5) {
30796 GemmMicrokernelTester()
30797 .mr(4)
30798 .nr(2)
30799 .kr(4)
30800 .sr(1)
30801 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030802 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030803 .k(k)
30804 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30805 }
30806 }
30807 }
30808
30809 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_strided_cn) {
30810 for (uint32_t n = 4; n <= 6; n += 2) {
30811 for (size_t k = 1; k <= 20; k += 5) {
30812 GemmMicrokernelTester()
30813 .mr(4)
30814 .nr(2)
30815 .kr(4)
30816 .sr(1)
30817 .m(4)
30818 .n(n)
30819 .k(k)
30820 .cn_stride(5)
30821 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30822 }
30823 }
30824 }
30825
30826 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_subtile) {
30827 for (uint32_t n = 4; n <= 6; n += 2) {
30828 for (size_t k = 1; k <= 20; k += 5) {
30829 for (uint32_t m = 1; m <= 4; m++) {
30830 GemmMicrokernelTester()
30831 .mr(4)
30832 .nr(2)
30833 .kr(4)
30834 .sr(1)
30835 .m(m)
30836 .n(n)
30837 .k(k)
30838 .iterations(1)
30839 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30840 }
30841 }
30842 }
30843 }
30844
30845 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, small_kernel) {
30846 for (size_t k = 1; k <= 20; k += 5) {
30847 GemmMicrokernelTester()
30848 .mr(4)
30849 .nr(2)
30850 .kr(4)
30851 .sr(1)
30852 .m(4)
30853 .n(2)
30854 .k(k)
30855 .ks(3)
30856 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30857 }
30858 }
30859
30860 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, small_kernel_subtile) {
30861 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030862 for (uint32_t n = 1; n <= 2; n++) {
30863 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030864 GemmMicrokernelTester()
30865 .mr(4)
30866 .nr(2)
30867 .kr(4)
30868 .sr(1)
30869 .m(m)
30870 .n(n)
30871 .k(k)
30872 .ks(3)
30873 .iterations(1)
30874 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30875 }
30876 }
30877 }
30878 }
30879
30880 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_gt_2_small_kernel) {
30881 for (uint32_t n = 3; n < 4; n++) {
30882 for (size_t k = 1; k <= 20; k += 5) {
30883 GemmMicrokernelTester()
30884 .mr(4)
30885 .nr(2)
30886 .kr(4)
30887 .sr(1)
30888 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030889 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030890 .k(k)
30891 .ks(3)
30892 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30893 }
30894 }
30895 }
30896
30897 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, n_div_2_small_kernel) {
30898 for (uint32_t n = 4; n <= 6; n += 2) {
30899 for (size_t k = 1; k <= 20; k += 5) {
30900 GemmMicrokernelTester()
30901 .mr(4)
30902 .nr(2)
30903 .kr(4)
30904 .sr(1)
30905 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030906 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030907 .k(k)
30908 .ks(3)
30909 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30910 }
30911 }
30912 }
30913
30914 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cm_subtile) {
30915 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030916 for (uint32_t n = 1; n <= 2; n++) {
30917 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030918 GemmMicrokernelTester()
30919 .mr(4)
30920 .nr(2)
30921 .kr(4)
30922 .sr(1)
30923 .m(m)
30924 .n(n)
30925 .k(k)
30926 .cm_stride(5)
30927 .iterations(1)
30928 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30929 }
30930 }
30931 }
30932 }
30933
30934 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, a_offset) {
30935 for (size_t k = 1; k <= 20; k += 5) {
30936 GemmMicrokernelTester()
30937 .mr(4)
30938 .nr(2)
30939 .kr(4)
30940 .sr(1)
30941 .m(4)
30942 .n(2)
30943 .k(k)
30944 .ks(3)
30945 .a_offset(83)
30946 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30947 }
30948 }
30949
30950 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030951 for (size_t k = 1; k <= 20; k += 5) {
30952 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080030953 GemmMicrokernelTester()
30954 .mr(4)
30955 .nr(2)
30956 .kr(4)
30957 .sr(1)
30958 .m(4)
30959 .n(2)
30960 .k(k)
30961 .ks(3)
30962 .a_offset(83)
30963 .zero_index(mz)
30964 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30965 }
30966 }
30967 }
30968
30969 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, qmin) {
30970 GemmMicrokernelTester()
30971 .mr(4)
30972 .nr(2)
30973 .kr(4)
30974 .sr(1)
30975 .m(4)
30976 .n(2)
30977 .k(4)
30978 .qmin(128)
30979 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30980 }
30981
30982 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, qmax) {
30983 GemmMicrokernelTester()
30984 .mr(4)
30985 .nr(2)
30986 .kr(4)
30987 .sr(1)
30988 .m(4)
30989 .n(2)
30990 .k(4)
30991 .qmax(128)
30992 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
30993 }
30994
30995 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_ARM, strided_cm) {
30996 GemmMicrokernelTester()
30997 .mr(4)
30998 .nr(2)
30999 .kr(4)
31000 .sr(1)
31001 .m(4)
31002 .n(2)
31003 .k(4)
31004 .cm_stride(5)
31005 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm, xnn_init_f32_minmax_wasmsimd_params);
31006 }
31007#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31008
31009
31010#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31011 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4) {
31012 GemmMicrokernelTester()
31013 .mr(4)
31014 .nr(2)
31015 .kr(4)
31016 .sr(1)
31017 .m(4)
31018 .n(2)
31019 .k(4)
31020 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31021 }
31022
31023 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cn) {
31024 GemmMicrokernelTester()
31025 .mr(4)
31026 .nr(2)
31027 .kr(4)
31028 .sr(1)
31029 .m(4)
31030 .n(2)
31031 .k(4)
31032 .cn_stride(5)
31033 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31034 }
31035
31036 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031037 for (uint32_t n = 1; n <= 2; n++) {
31038 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031039 GemmMicrokernelTester()
31040 .mr(4)
31041 .nr(2)
31042 .kr(4)
31043 .sr(1)
31044 .m(m)
31045 .n(n)
31046 .k(4)
31047 .iterations(1)
31048 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31049 }
31050 }
31051 }
31052
31053 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile_m) {
31054 for (uint32_t m = 1; m <= 4; m++) {
31055 GemmMicrokernelTester()
31056 .mr(4)
31057 .nr(2)
31058 .kr(4)
31059 .sr(1)
31060 .m(m)
31061 .n(2)
31062 .k(4)
31063 .iterations(1)
31064 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31065 }
31066 }
31067
31068 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_eq_4_subtile_n) {
31069 for (uint32_t n = 1; n <= 2; n++) {
31070 GemmMicrokernelTester()
31071 .mr(4)
31072 .nr(2)
31073 .kr(4)
31074 .sr(1)
31075 .m(4)
31076 .n(n)
31077 .k(4)
31078 .iterations(1)
31079 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31080 }
31081 }
31082
31083 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4) {
31084 for (size_t k = 1; k < 4; k++) {
31085 GemmMicrokernelTester()
31086 .mr(4)
31087 .nr(2)
31088 .kr(4)
31089 .sr(1)
31090 .m(4)
31091 .n(2)
31092 .k(k)
31093 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31094 }
31095 }
31096
31097 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_lt_4_subtile) {
31098 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031099 for (uint32_t n = 1; n <= 2; n++) {
31100 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031101 GemmMicrokernelTester()
31102 .mr(4)
31103 .nr(2)
31104 .kr(4)
31105 .sr(1)
31106 .m(m)
31107 .n(n)
31108 .k(k)
31109 .iterations(1)
31110 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31111 }
31112 }
31113 }
31114 }
31115
31116 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4) {
31117 for (size_t k = 5; k < 8; k++) {
31118 GemmMicrokernelTester()
31119 .mr(4)
31120 .nr(2)
31121 .kr(4)
31122 .sr(1)
31123 .m(4)
31124 .n(2)
31125 .k(k)
31126 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31127 }
31128 }
31129
31130 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_gt_4_subtile) {
31131 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031132 for (uint32_t n = 1; n <= 2; n++) {
31133 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031134 GemmMicrokernelTester()
31135 .mr(4)
31136 .nr(2)
31137 .kr(4)
31138 .sr(1)
31139 .m(m)
31140 .n(n)
31141 .k(k)
31142 .iterations(1)
31143 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31144 }
31145 }
31146 }
31147 }
31148
31149 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4) {
31150 for (size_t k = 8; k <= 40; k += 4) {
31151 GemmMicrokernelTester()
31152 .mr(4)
31153 .nr(2)
31154 .kr(4)
31155 .sr(1)
31156 .m(4)
31157 .n(2)
31158 .k(k)
31159 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31160 }
31161 }
31162
31163 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, k_div_4_subtile) {
31164 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031165 for (uint32_t n = 1; n <= 2; n++) {
31166 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031167 GemmMicrokernelTester()
31168 .mr(4)
31169 .nr(2)
31170 .kr(4)
31171 .sr(1)
31172 .m(m)
31173 .n(n)
31174 .k(k)
31175 .iterations(1)
31176 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31177 }
31178 }
31179 }
31180 }
31181
31182 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2) {
31183 for (uint32_t n = 3; n < 4; n++) {
31184 for (size_t k = 1; k <= 20; k += 5) {
31185 GemmMicrokernelTester()
31186 .mr(4)
31187 .nr(2)
31188 .kr(4)
31189 .sr(1)
31190 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031191 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031192 .k(k)
31193 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31194 }
31195 }
31196 }
31197
31198 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_strided_cn) {
31199 for (uint32_t n = 3; n < 4; n++) {
31200 for (size_t k = 1; k <= 20; k += 5) {
31201 GemmMicrokernelTester()
31202 .mr(4)
31203 .nr(2)
31204 .kr(4)
31205 .sr(1)
31206 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031207 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031208 .k(k)
31209 .cn_stride(5)
31210 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31211 }
31212 }
31213 }
31214
31215 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_subtile) {
31216 for (uint32_t n = 3; n < 4; n++) {
31217 for (size_t k = 1; k <= 20; k += 5) {
31218 for (uint32_t m = 1; m <= 4; m++) {
31219 GemmMicrokernelTester()
31220 .mr(4)
31221 .nr(2)
31222 .kr(4)
31223 .sr(1)
31224 .m(m)
31225 .n(n)
31226 .k(k)
31227 .iterations(1)
31228 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31229 }
31230 }
31231 }
31232 }
31233
31234 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2) {
31235 for (uint32_t n = 4; n <= 6; n += 2) {
31236 for (size_t k = 1; k <= 20; k += 5) {
31237 GemmMicrokernelTester()
31238 .mr(4)
31239 .nr(2)
31240 .kr(4)
31241 .sr(1)
31242 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031243 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031244 .k(k)
31245 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31246 }
31247 }
31248 }
31249
31250 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_strided_cn) {
31251 for (uint32_t n = 4; n <= 6; n += 2) {
31252 for (size_t k = 1; k <= 20; k += 5) {
31253 GemmMicrokernelTester()
31254 .mr(4)
31255 .nr(2)
31256 .kr(4)
31257 .sr(1)
31258 .m(4)
31259 .n(n)
31260 .k(k)
31261 .cn_stride(5)
31262 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31263 }
31264 }
31265 }
31266
31267 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_subtile) {
31268 for (uint32_t n = 4; n <= 6; n += 2) {
31269 for (size_t k = 1; k <= 20; k += 5) {
31270 for (uint32_t m = 1; m <= 4; m++) {
31271 GemmMicrokernelTester()
31272 .mr(4)
31273 .nr(2)
31274 .kr(4)
31275 .sr(1)
31276 .m(m)
31277 .n(n)
31278 .k(k)
31279 .iterations(1)
31280 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31281 }
31282 }
31283 }
31284 }
31285
31286 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, small_kernel) {
31287 for (size_t k = 1; k <= 20; k += 5) {
31288 GemmMicrokernelTester()
31289 .mr(4)
31290 .nr(2)
31291 .kr(4)
31292 .sr(1)
31293 .m(4)
31294 .n(2)
31295 .k(k)
31296 .ks(3)
31297 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31298 }
31299 }
31300
31301 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, small_kernel_subtile) {
31302 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031303 for (uint32_t n = 1; n <= 2; n++) {
31304 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031305 GemmMicrokernelTester()
31306 .mr(4)
31307 .nr(2)
31308 .kr(4)
31309 .sr(1)
31310 .m(m)
31311 .n(n)
31312 .k(k)
31313 .ks(3)
31314 .iterations(1)
31315 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31316 }
31317 }
31318 }
31319 }
31320
31321 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_gt_2_small_kernel) {
31322 for (uint32_t n = 3; n < 4; n++) {
31323 for (size_t k = 1; k <= 20; k += 5) {
31324 GemmMicrokernelTester()
31325 .mr(4)
31326 .nr(2)
31327 .kr(4)
31328 .sr(1)
31329 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031330 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031331 .k(k)
31332 .ks(3)
31333 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31334 }
31335 }
31336 }
31337
31338 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, n_div_2_small_kernel) {
31339 for (uint32_t n = 4; n <= 6; n += 2) {
31340 for (size_t k = 1; k <= 20; k += 5) {
31341 GemmMicrokernelTester()
31342 .mr(4)
31343 .nr(2)
31344 .kr(4)
31345 .sr(1)
31346 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031347 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031348 .k(k)
31349 .ks(3)
31350 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31351 }
31352 }
31353 }
31354
31355 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cm_subtile) {
31356 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031357 for (uint32_t n = 1; n <= 2; n++) {
31358 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031359 GemmMicrokernelTester()
31360 .mr(4)
31361 .nr(2)
31362 .kr(4)
31363 .sr(1)
31364 .m(m)
31365 .n(n)
31366 .k(k)
31367 .cm_stride(5)
31368 .iterations(1)
31369 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31370 }
31371 }
31372 }
31373 }
31374
31375 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, a_offset) {
31376 for (size_t k = 1; k <= 20; k += 5) {
31377 GemmMicrokernelTester()
31378 .mr(4)
31379 .nr(2)
31380 .kr(4)
31381 .sr(1)
31382 .m(4)
31383 .n(2)
31384 .k(k)
31385 .ks(3)
31386 .a_offset(83)
31387 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31388 }
31389 }
31390
31391 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031392 for (size_t k = 1; k <= 20; k += 5) {
31393 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031394 GemmMicrokernelTester()
31395 .mr(4)
31396 .nr(2)
31397 .kr(4)
31398 .sr(1)
31399 .m(4)
31400 .n(2)
31401 .k(k)
31402 .ks(3)
31403 .a_offset(83)
31404 .zero_index(mz)
31405 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31406 }
31407 }
31408 }
31409
31410 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, qmin) {
31411 GemmMicrokernelTester()
31412 .mr(4)
31413 .nr(2)
31414 .kr(4)
31415 .sr(1)
31416 .m(4)
31417 .n(2)
31418 .k(4)
31419 .qmin(128)
31420 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31421 }
31422
31423 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, qmax) {
31424 GemmMicrokernelTester()
31425 .mr(4)
31426 .nr(2)
31427 .kr(4)
31428 .sr(1)
31429 .m(4)
31430 .n(2)
31431 .k(4)
31432 .qmax(128)
31433 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31434 }
31435
31436 TEST(F32_IGEMM_MINMAX_4X2C4__WASMSIMD_X86, strided_cm) {
31437 GemmMicrokernelTester()
31438 .mr(4)
31439 .nr(2)
31440 .kr(4)
31441 .sr(1)
31442 .m(4)
31443 .n(2)
31444 .k(4)
31445 .cm_stride(5)
31446 .Test(xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86, xnn_init_f32_minmax_wasmsimd_params);
31447 }
31448#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31449
31450
31451#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31452 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1) {
31453 GemmMicrokernelTester()
31454 .mr(2)
31455 .nr(4)
31456 .kr(1)
31457 .sr(1)
31458 .m(2)
31459 .n(4)
31460 .k(1)
31461 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31462 }
31463
31464 TEST(F32_IGEMM_MINMAX_2X4__WASM, strided_cn) {
31465 GemmMicrokernelTester()
31466 .mr(2)
31467 .nr(4)
31468 .kr(1)
31469 .sr(1)
31470 .m(2)
31471 .n(4)
31472 .k(1)
31473 .cn_stride(7)
31474 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31475 }
31476
31477 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031478 for (uint32_t n = 1; n <= 4; n++) {
31479 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031480 GemmMicrokernelTester()
31481 .mr(2)
31482 .nr(4)
31483 .kr(1)
31484 .sr(1)
31485 .m(m)
31486 .n(n)
31487 .k(1)
31488 .iterations(1)
31489 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31490 }
31491 }
31492 }
31493
31494 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1_subtile_m) {
31495 for (uint32_t m = 1; m <= 2; m++) {
31496 GemmMicrokernelTester()
31497 .mr(2)
31498 .nr(4)
31499 .kr(1)
31500 .sr(1)
31501 .m(m)
31502 .n(4)
31503 .k(1)
31504 .iterations(1)
31505 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31506 }
31507 }
31508
31509 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_eq_1_subtile_n) {
31510 for (uint32_t n = 1; n <= 4; n++) {
31511 GemmMicrokernelTester()
31512 .mr(2)
31513 .nr(4)
31514 .kr(1)
31515 .sr(1)
31516 .m(2)
31517 .n(n)
31518 .k(1)
31519 .iterations(1)
31520 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31521 }
31522 }
31523
31524 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_gt_1) {
31525 for (size_t k = 2; k < 10; k++) {
31526 GemmMicrokernelTester()
31527 .mr(2)
31528 .nr(4)
31529 .kr(1)
31530 .sr(1)
31531 .m(2)
31532 .n(4)
31533 .k(k)
31534 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31535 }
31536 }
31537
31538 TEST(F32_IGEMM_MINMAX_2X4__WASM, k_gt_1_subtile) {
31539 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031540 for (uint32_t n = 1; n <= 4; n++) {
31541 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031542 GemmMicrokernelTester()
31543 .mr(2)
31544 .nr(4)
31545 .kr(1)
31546 .sr(1)
31547 .m(m)
31548 .n(n)
31549 .k(k)
31550 .iterations(1)
31551 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31552 }
31553 }
31554 }
31555 }
31556
31557 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4) {
31558 for (uint32_t n = 5; n < 8; n++) {
31559 for (size_t k = 1; k <= 5; k += 2) {
31560 GemmMicrokernelTester()
31561 .mr(2)
31562 .nr(4)
31563 .kr(1)
31564 .sr(1)
31565 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031566 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031567 .k(k)
31568 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31569 }
31570 }
31571 }
31572
31573 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4_strided_cn) {
31574 for (uint32_t n = 5; n < 8; n++) {
31575 for (size_t k = 1; k <= 5; k += 2) {
31576 GemmMicrokernelTester()
31577 .mr(2)
31578 .nr(4)
31579 .kr(1)
31580 .sr(1)
31581 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031582 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031583 .k(k)
31584 .cn_stride(7)
31585 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31586 }
31587 }
31588 }
31589
31590 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4_subtile) {
31591 for (uint32_t n = 5; n < 8; n++) {
31592 for (size_t k = 1; k <= 5; k += 2) {
31593 for (uint32_t m = 1; m <= 2; m++) {
31594 GemmMicrokernelTester()
31595 .mr(2)
31596 .nr(4)
31597 .kr(1)
31598 .sr(1)
31599 .m(m)
31600 .n(n)
31601 .k(k)
31602 .iterations(1)
31603 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31604 }
31605 }
31606 }
31607 }
31608
31609 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4) {
31610 for (uint32_t n = 8; n <= 12; n += 4) {
31611 for (size_t k = 1; k <= 5; k += 2) {
31612 GemmMicrokernelTester()
31613 .mr(2)
31614 .nr(4)
31615 .kr(1)
31616 .sr(1)
31617 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031618 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031619 .k(k)
31620 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31621 }
31622 }
31623 }
31624
31625 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4_strided_cn) {
31626 for (uint32_t n = 8; n <= 12; n += 4) {
31627 for (size_t k = 1; k <= 5; k += 2) {
31628 GemmMicrokernelTester()
31629 .mr(2)
31630 .nr(4)
31631 .kr(1)
31632 .sr(1)
31633 .m(2)
31634 .n(n)
31635 .k(k)
31636 .cn_stride(7)
31637 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31638 }
31639 }
31640 }
31641
31642 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4_subtile) {
31643 for (uint32_t n = 8; n <= 12; n += 4) {
31644 for (size_t k = 1; k <= 5; k += 2) {
31645 for (uint32_t m = 1; m <= 2; m++) {
31646 GemmMicrokernelTester()
31647 .mr(2)
31648 .nr(4)
31649 .kr(1)
31650 .sr(1)
31651 .m(m)
31652 .n(n)
31653 .k(k)
31654 .iterations(1)
31655 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31656 }
31657 }
31658 }
31659 }
31660
31661 TEST(F32_IGEMM_MINMAX_2X4__WASM, small_kernel) {
31662 for (size_t k = 1; k <= 5; k += 2) {
31663 GemmMicrokernelTester()
31664 .mr(2)
31665 .nr(4)
31666 .kr(1)
31667 .sr(1)
31668 .m(2)
31669 .n(4)
31670 .k(k)
31671 .ks(3)
31672 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31673 }
31674 }
31675
31676 TEST(F32_IGEMM_MINMAX_2X4__WASM, small_kernel_subtile) {
31677 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031678 for (uint32_t n = 1; n <= 4; n++) {
31679 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031680 GemmMicrokernelTester()
31681 .mr(2)
31682 .nr(4)
31683 .kr(1)
31684 .sr(1)
31685 .m(m)
31686 .n(n)
31687 .k(k)
31688 .ks(3)
31689 .iterations(1)
31690 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31691 }
31692 }
31693 }
31694 }
31695
31696 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_gt_4_small_kernel) {
31697 for (uint32_t n = 5; n < 8; n++) {
31698 for (size_t k = 1; k <= 5; k += 2) {
31699 GemmMicrokernelTester()
31700 .mr(2)
31701 .nr(4)
31702 .kr(1)
31703 .sr(1)
31704 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031705 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031706 .k(k)
31707 .ks(3)
31708 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31709 }
31710 }
31711 }
31712
31713 TEST(F32_IGEMM_MINMAX_2X4__WASM, n_div_4_small_kernel) {
31714 for (uint32_t n = 8; n <= 12; n += 4) {
31715 for (size_t k = 1; k <= 5; k += 2) {
31716 GemmMicrokernelTester()
31717 .mr(2)
31718 .nr(4)
31719 .kr(1)
31720 .sr(1)
31721 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031722 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031723 .k(k)
31724 .ks(3)
31725 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31726 }
31727 }
31728 }
31729
31730 TEST(F32_IGEMM_MINMAX_2X4__WASM, strided_cm_subtile) {
31731 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031732 for (uint32_t n = 1; n <= 4; n++) {
31733 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031734 GemmMicrokernelTester()
31735 .mr(2)
31736 .nr(4)
31737 .kr(1)
31738 .sr(1)
31739 .m(m)
31740 .n(n)
31741 .k(k)
31742 .cm_stride(7)
31743 .iterations(1)
31744 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31745 }
31746 }
31747 }
31748 }
31749
31750 TEST(F32_IGEMM_MINMAX_2X4__WASM, a_offset) {
31751 for (size_t k = 1; k <= 5; k += 2) {
31752 GemmMicrokernelTester()
31753 .mr(2)
31754 .nr(4)
31755 .kr(1)
31756 .sr(1)
31757 .m(2)
31758 .n(4)
31759 .k(k)
31760 .ks(3)
31761 .a_offset(13)
31762 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31763 }
31764 }
31765
31766 TEST(F32_IGEMM_MINMAX_2X4__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031767 for (size_t k = 1; k <= 5; k += 2) {
31768 for (uint32_t mz = 0; mz < 2; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031769 GemmMicrokernelTester()
31770 .mr(2)
31771 .nr(4)
31772 .kr(1)
31773 .sr(1)
31774 .m(2)
31775 .n(4)
31776 .k(k)
31777 .ks(3)
31778 .a_offset(13)
31779 .zero_index(mz)
31780 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31781 }
31782 }
31783 }
31784
31785 TEST(F32_IGEMM_MINMAX_2X4__WASM, qmin) {
31786 GemmMicrokernelTester()
31787 .mr(2)
31788 .nr(4)
31789 .kr(1)
31790 .sr(1)
31791 .m(2)
31792 .n(4)
31793 .k(1)
31794 .qmin(128)
31795 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31796 }
31797
31798 TEST(F32_IGEMM_MINMAX_2X4__WASM, qmax) {
31799 GemmMicrokernelTester()
31800 .mr(2)
31801 .nr(4)
31802 .kr(1)
31803 .sr(1)
31804 .m(2)
31805 .n(4)
31806 .k(1)
31807 .qmax(128)
31808 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31809 }
31810
31811 TEST(F32_IGEMM_MINMAX_2X4__WASM, strided_cm) {
31812 GemmMicrokernelTester()
31813 .mr(2)
31814 .nr(4)
31815 .kr(1)
31816 .sr(1)
31817 .m(2)
31818 .n(4)
31819 .k(1)
31820 .cm_stride(7)
31821 .Test(xnn_f32_igemm_minmax_ukernel_2x4__wasm, xnn_init_f32_minmax_scalar_params);
31822 }
31823#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31824
31825
31826#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31827 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1) {
31828 GemmMicrokernelTester()
31829 .mr(4)
31830 .nr(4)
31831 .kr(1)
31832 .sr(1)
31833 .m(4)
31834 .n(4)
31835 .k(1)
31836 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31837 }
31838
31839 TEST(F32_IGEMM_MINMAX_4X4__WASM, strided_cn) {
31840 GemmMicrokernelTester()
31841 .mr(4)
31842 .nr(4)
31843 .kr(1)
31844 .sr(1)
31845 .m(4)
31846 .n(4)
31847 .k(1)
31848 .cn_stride(7)
31849 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31850 }
31851
31852 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031853 for (uint32_t n = 1; n <= 4; n++) {
31854 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031855 GemmMicrokernelTester()
31856 .mr(4)
31857 .nr(4)
31858 .kr(1)
31859 .sr(1)
31860 .m(m)
31861 .n(n)
31862 .k(1)
31863 .iterations(1)
31864 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31865 }
31866 }
31867 }
31868
31869 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1_subtile_m) {
31870 for (uint32_t m = 1; m <= 4; m++) {
31871 GemmMicrokernelTester()
31872 .mr(4)
31873 .nr(4)
31874 .kr(1)
31875 .sr(1)
31876 .m(m)
31877 .n(4)
31878 .k(1)
31879 .iterations(1)
31880 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31881 }
31882 }
31883
31884 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_eq_1_subtile_n) {
31885 for (uint32_t n = 1; n <= 4; n++) {
31886 GemmMicrokernelTester()
31887 .mr(4)
31888 .nr(4)
31889 .kr(1)
31890 .sr(1)
31891 .m(4)
31892 .n(n)
31893 .k(1)
31894 .iterations(1)
31895 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31896 }
31897 }
31898
31899 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_gt_1) {
31900 for (size_t k = 2; k < 10; k++) {
31901 GemmMicrokernelTester()
31902 .mr(4)
31903 .nr(4)
31904 .kr(1)
31905 .sr(1)
31906 .m(4)
31907 .n(4)
31908 .k(k)
31909 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31910 }
31911 }
31912
31913 TEST(F32_IGEMM_MINMAX_4X4__WASM, k_gt_1_subtile) {
31914 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031915 for (uint32_t n = 1; n <= 4; n++) {
31916 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031917 GemmMicrokernelTester()
31918 .mr(4)
31919 .nr(4)
31920 .kr(1)
31921 .sr(1)
31922 .m(m)
31923 .n(n)
31924 .k(k)
31925 .iterations(1)
31926 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31927 }
31928 }
31929 }
31930 }
31931
31932 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4) {
31933 for (uint32_t n = 5; n < 8; n++) {
31934 for (size_t k = 1; k <= 5; k += 2) {
31935 GemmMicrokernelTester()
31936 .mr(4)
31937 .nr(4)
31938 .kr(1)
31939 .sr(1)
31940 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031941 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031942 .k(k)
31943 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31944 }
31945 }
31946 }
31947
31948 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4_strided_cn) {
31949 for (uint32_t n = 5; n < 8; n++) {
31950 for (size_t k = 1; k <= 5; k += 2) {
31951 GemmMicrokernelTester()
31952 .mr(4)
31953 .nr(4)
31954 .kr(1)
31955 .sr(1)
31956 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031957 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031958 .k(k)
31959 .cn_stride(7)
31960 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31961 }
31962 }
31963 }
31964
31965 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4_subtile) {
31966 for (uint32_t n = 5; n < 8; n++) {
31967 for (size_t k = 1; k <= 5; k += 2) {
31968 for (uint32_t m = 1; m <= 4; m++) {
31969 GemmMicrokernelTester()
31970 .mr(4)
31971 .nr(4)
31972 .kr(1)
31973 .sr(1)
31974 .m(m)
31975 .n(n)
31976 .k(k)
31977 .iterations(1)
31978 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31979 }
31980 }
31981 }
31982 }
31983
31984 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4) {
31985 for (uint32_t n = 8; n <= 12; n += 4) {
31986 for (size_t k = 1; k <= 5; k += 2) {
31987 GemmMicrokernelTester()
31988 .mr(4)
31989 .nr(4)
31990 .kr(1)
31991 .sr(1)
31992 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031993 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080031994 .k(k)
31995 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
31996 }
31997 }
31998 }
31999
32000 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4_strided_cn) {
32001 for (uint32_t n = 8; n <= 12; n += 4) {
32002 for (size_t k = 1; k <= 5; k += 2) {
32003 GemmMicrokernelTester()
32004 .mr(4)
32005 .nr(4)
32006 .kr(1)
32007 .sr(1)
32008 .m(4)
32009 .n(n)
32010 .k(k)
32011 .cn_stride(7)
32012 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32013 }
32014 }
32015 }
32016
32017 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4_subtile) {
32018 for (uint32_t n = 8; n <= 12; n += 4) {
32019 for (size_t k = 1; k <= 5; k += 2) {
32020 for (uint32_t m = 1; m <= 4; m++) {
32021 GemmMicrokernelTester()
32022 .mr(4)
32023 .nr(4)
32024 .kr(1)
32025 .sr(1)
32026 .m(m)
32027 .n(n)
32028 .k(k)
32029 .iterations(1)
32030 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32031 }
32032 }
32033 }
32034 }
32035
32036 TEST(F32_IGEMM_MINMAX_4X4__WASM, small_kernel) {
32037 for (size_t k = 1; k <= 5; k += 2) {
32038 GemmMicrokernelTester()
32039 .mr(4)
32040 .nr(4)
32041 .kr(1)
32042 .sr(1)
32043 .m(4)
32044 .n(4)
32045 .k(k)
32046 .ks(3)
32047 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32048 }
32049 }
32050
32051 TEST(F32_IGEMM_MINMAX_4X4__WASM, small_kernel_subtile) {
32052 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032053 for (uint32_t n = 1; n <= 4; n++) {
32054 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032055 GemmMicrokernelTester()
32056 .mr(4)
32057 .nr(4)
32058 .kr(1)
32059 .sr(1)
32060 .m(m)
32061 .n(n)
32062 .k(k)
32063 .ks(3)
32064 .iterations(1)
32065 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32066 }
32067 }
32068 }
32069 }
32070
32071 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_gt_4_small_kernel) {
32072 for (uint32_t n = 5; n < 8; n++) {
32073 for (size_t k = 1; k <= 5; k += 2) {
32074 GemmMicrokernelTester()
32075 .mr(4)
32076 .nr(4)
32077 .kr(1)
32078 .sr(1)
32079 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032080 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032081 .k(k)
32082 .ks(3)
32083 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32084 }
32085 }
32086 }
32087
32088 TEST(F32_IGEMM_MINMAX_4X4__WASM, n_div_4_small_kernel) {
32089 for (uint32_t n = 8; n <= 12; n += 4) {
32090 for (size_t k = 1; k <= 5; k += 2) {
32091 GemmMicrokernelTester()
32092 .mr(4)
32093 .nr(4)
32094 .kr(1)
32095 .sr(1)
32096 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032097 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032098 .k(k)
32099 .ks(3)
32100 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32101 }
32102 }
32103 }
32104
32105 TEST(F32_IGEMM_MINMAX_4X4__WASM, strided_cm_subtile) {
32106 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032107 for (uint32_t n = 1; n <= 4; n++) {
32108 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032109 GemmMicrokernelTester()
32110 .mr(4)
32111 .nr(4)
32112 .kr(1)
32113 .sr(1)
32114 .m(m)
32115 .n(n)
32116 .k(k)
32117 .cm_stride(7)
32118 .iterations(1)
32119 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32120 }
32121 }
32122 }
32123 }
32124
32125 TEST(F32_IGEMM_MINMAX_4X4__WASM, a_offset) {
32126 for (size_t k = 1; k <= 5; k += 2) {
32127 GemmMicrokernelTester()
32128 .mr(4)
32129 .nr(4)
32130 .kr(1)
32131 .sr(1)
32132 .m(4)
32133 .n(4)
32134 .k(k)
32135 .ks(3)
32136 .a_offset(23)
32137 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32138 }
32139 }
32140
32141 TEST(F32_IGEMM_MINMAX_4X4__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032142 for (size_t k = 1; k <= 5; k += 2) {
32143 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032144 GemmMicrokernelTester()
32145 .mr(4)
32146 .nr(4)
32147 .kr(1)
32148 .sr(1)
32149 .m(4)
32150 .n(4)
32151 .k(k)
32152 .ks(3)
32153 .a_offset(23)
32154 .zero_index(mz)
32155 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32156 }
32157 }
32158 }
32159
32160 TEST(F32_IGEMM_MINMAX_4X4__WASM, qmin) {
32161 GemmMicrokernelTester()
32162 .mr(4)
32163 .nr(4)
32164 .kr(1)
32165 .sr(1)
32166 .m(4)
32167 .n(4)
32168 .k(1)
32169 .qmin(128)
32170 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32171 }
32172
32173 TEST(F32_IGEMM_MINMAX_4X4__WASM, qmax) {
32174 GemmMicrokernelTester()
32175 .mr(4)
32176 .nr(4)
32177 .kr(1)
32178 .sr(1)
32179 .m(4)
32180 .n(4)
32181 .k(1)
32182 .qmax(128)
32183 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32184 }
32185
32186 TEST(F32_IGEMM_MINMAX_4X4__WASM, strided_cm) {
32187 GemmMicrokernelTester()
32188 .mr(4)
32189 .nr(4)
32190 .kr(1)
32191 .sr(1)
32192 .m(4)
32193 .n(4)
32194 .k(1)
32195 .cm_stride(7)
32196 .Test(xnn_f32_igemm_minmax_ukernel_4x4__wasm, xnn_init_f32_minmax_scalar_params);
32197 }
32198#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32199
32200
32201#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32202 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1) {
32203 GemmMicrokernelTester()
32204 .mr(4)
32205 .nr(2)
32206 .kr(1)
32207 .sr(1)
32208 .m(4)
32209 .n(2)
32210 .k(1)
32211 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32212 }
32213
32214 TEST(F32_IGEMM_MINMAX_4X2__WASM, strided_cn) {
32215 GemmMicrokernelTester()
32216 .mr(4)
32217 .nr(2)
32218 .kr(1)
32219 .sr(1)
32220 .m(4)
32221 .n(2)
32222 .k(1)
32223 .cn_stride(5)
32224 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32225 }
32226
32227 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032228 for (uint32_t n = 1; n <= 2; n++) {
32229 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032230 GemmMicrokernelTester()
32231 .mr(4)
32232 .nr(2)
32233 .kr(1)
32234 .sr(1)
32235 .m(m)
32236 .n(n)
32237 .k(1)
32238 .iterations(1)
32239 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32240 }
32241 }
32242 }
32243
32244 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1_subtile_m) {
32245 for (uint32_t m = 1; m <= 4; m++) {
32246 GemmMicrokernelTester()
32247 .mr(4)
32248 .nr(2)
32249 .kr(1)
32250 .sr(1)
32251 .m(m)
32252 .n(2)
32253 .k(1)
32254 .iterations(1)
32255 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32256 }
32257 }
32258
32259 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_eq_1_subtile_n) {
32260 for (uint32_t n = 1; n <= 2; n++) {
32261 GemmMicrokernelTester()
32262 .mr(4)
32263 .nr(2)
32264 .kr(1)
32265 .sr(1)
32266 .m(4)
32267 .n(n)
32268 .k(1)
32269 .iterations(1)
32270 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32271 }
32272 }
32273
32274 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_gt_1) {
32275 for (size_t k = 2; k < 10; k++) {
32276 GemmMicrokernelTester()
32277 .mr(4)
32278 .nr(2)
32279 .kr(1)
32280 .sr(1)
32281 .m(4)
32282 .n(2)
32283 .k(k)
32284 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32285 }
32286 }
32287
32288 TEST(F32_IGEMM_MINMAX_4X2__WASM, k_gt_1_subtile) {
32289 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032290 for (uint32_t n = 1; n <= 2; n++) {
32291 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032292 GemmMicrokernelTester()
32293 .mr(4)
32294 .nr(2)
32295 .kr(1)
32296 .sr(1)
32297 .m(m)
32298 .n(n)
32299 .k(k)
32300 .iterations(1)
32301 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32302 }
32303 }
32304 }
32305 }
32306
32307 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2) {
32308 for (uint32_t n = 3; n < 4; n++) {
32309 for (size_t k = 1; k <= 5; k += 2) {
32310 GemmMicrokernelTester()
32311 .mr(4)
32312 .nr(2)
32313 .kr(1)
32314 .sr(1)
32315 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032316 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032317 .k(k)
32318 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32319 }
32320 }
32321 }
32322
32323 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2_strided_cn) {
32324 for (uint32_t n = 3; n < 4; n++) {
32325 for (size_t k = 1; k <= 5; k += 2) {
32326 GemmMicrokernelTester()
32327 .mr(4)
32328 .nr(2)
32329 .kr(1)
32330 .sr(1)
32331 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032332 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032333 .k(k)
32334 .cn_stride(5)
32335 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32336 }
32337 }
32338 }
32339
32340 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2_subtile) {
32341 for (uint32_t n = 3; n < 4; n++) {
32342 for (size_t k = 1; k <= 5; k += 2) {
32343 for (uint32_t m = 1; m <= 4; m++) {
32344 GemmMicrokernelTester()
32345 .mr(4)
32346 .nr(2)
32347 .kr(1)
32348 .sr(1)
32349 .m(m)
32350 .n(n)
32351 .k(k)
32352 .iterations(1)
32353 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32354 }
32355 }
32356 }
32357 }
32358
32359 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2) {
32360 for (uint32_t n = 4; n <= 6; n += 2) {
32361 for (size_t k = 1; k <= 5; k += 2) {
32362 GemmMicrokernelTester()
32363 .mr(4)
32364 .nr(2)
32365 .kr(1)
32366 .sr(1)
32367 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032368 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032369 .k(k)
32370 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32371 }
32372 }
32373 }
32374
32375 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2_strided_cn) {
32376 for (uint32_t n = 4; n <= 6; n += 2) {
32377 for (size_t k = 1; k <= 5; k += 2) {
32378 GemmMicrokernelTester()
32379 .mr(4)
32380 .nr(2)
32381 .kr(1)
32382 .sr(1)
32383 .m(4)
32384 .n(n)
32385 .k(k)
32386 .cn_stride(5)
32387 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32388 }
32389 }
32390 }
32391
32392 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2_subtile) {
32393 for (uint32_t n = 4; n <= 6; n += 2) {
32394 for (size_t k = 1; k <= 5; k += 2) {
32395 for (uint32_t m = 1; m <= 4; m++) {
32396 GemmMicrokernelTester()
32397 .mr(4)
32398 .nr(2)
32399 .kr(1)
32400 .sr(1)
32401 .m(m)
32402 .n(n)
32403 .k(k)
32404 .iterations(1)
32405 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32406 }
32407 }
32408 }
32409 }
32410
32411 TEST(F32_IGEMM_MINMAX_4X2__WASM, small_kernel) {
32412 for (size_t k = 1; k <= 5; k += 2) {
32413 GemmMicrokernelTester()
32414 .mr(4)
32415 .nr(2)
32416 .kr(1)
32417 .sr(1)
32418 .m(4)
32419 .n(2)
32420 .k(k)
32421 .ks(3)
32422 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32423 }
32424 }
32425
32426 TEST(F32_IGEMM_MINMAX_4X2__WASM, small_kernel_subtile) {
32427 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032428 for (uint32_t n = 1; n <= 2; n++) {
32429 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032430 GemmMicrokernelTester()
32431 .mr(4)
32432 .nr(2)
32433 .kr(1)
32434 .sr(1)
32435 .m(m)
32436 .n(n)
32437 .k(k)
32438 .ks(3)
32439 .iterations(1)
32440 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32441 }
32442 }
32443 }
32444 }
32445
32446 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_gt_2_small_kernel) {
32447 for (uint32_t n = 3; n < 4; n++) {
32448 for (size_t k = 1; k <= 5; k += 2) {
32449 GemmMicrokernelTester()
32450 .mr(4)
32451 .nr(2)
32452 .kr(1)
32453 .sr(1)
32454 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032455 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032456 .k(k)
32457 .ks(3)
32458 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32459 }
32460 }
32461 }
32462
32463 TEST(F32_IGEMM_MINMAX_4X2__WASM, n_div_2_small_kernel) {
32464 for (uint32_t n = 4; n <= 6; n += 2) {
32465 for (size_t k = 1; k <= 5; k += 2) {
32466 GemmMicrokernelTester()
32467 .mr(4)
32468 .nr(2)
32469 .kr(1)
32470 .sr(1)
32471 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032472 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032473 .k(k)
32474 .ks(3)
32475 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32476 }
32477 }
32478 }
32479
32480 TEST(F32_IGEMM_MINMAX_4X2__WASM, strided_cm_subtile) {
32481 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032482 for (uint32_t n = 1; n <= 2; n++) {
32483 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032484 GemmMicrokernelTester()
32485 .mr(4)
32486 .nr(2)
32487 .kr(1)
32488 .sr(1)
32489 .m(m)
32490 .n(n)
32491 .k(k)
32492 .cm_stride(5)
32493 .iterations(1)
32494 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32495 }
32496 }
32497 }
32498 }
32499
32500 TEST(F32_IGEMM_MINMAX_4X2__WASM, a_offset) {
32501 for (size_t k = 1; k <= 5; k += 2) {
32502 GemmMicrokernelTester()
32503 .mr(4)
32504 .nr(2)
32505 .kr(1)
32506 .sr(1)
32507 .m(4)
32508 .n(2)
32509 .k(k)
32510 .ks(3)
32511 .a_offset(23)
32512 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32513 }
32514 }
32515
32516 TEST(F32_IGEMM_MINMAX_4X2__WASM, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032517 for (size_t k = 1; k <= 5; k += 2) {
32518 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032519 GemmMicrokernelTester()
32520 .mr(4)
32521 .nr(2)
32522 .kr(1)
32523 .sr(1)
32524 .m(4)
32525 .n(2)
32526 .k(k)
32527 .ks(3)
32528 .a_offset(23)
32529 .zero_index(mz)
32530 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32531 }
32532 }
32533 }
32534
32535 TEST(F32_IGEMM_MINMAX_4X2__WASM, qmin) {
32536 GemmMicrokernelTester()
32537 .mr(4)
32538 .nr(2)
32539 .kr(1)
32540 .sr(1)
32541 .m(4)
32542 .n(2)
32543 .k(1)
32544 .qmin(128)
32545 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32546 }
32547
32548 TEST(F32_IGEMM_MINMAX_4X2__WASM, qmax) {
32549 GemmMicrokernelTester()
32550 .mr(4)
32551 .nr(2)
32552 .kr(1)
32553 .sr(1)
32554 .m(4)
32555 .n(2)
32556 .k(1)
32557 .qmax(128)
32558 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32559 }
32560
32561 TEST(F32_IGEMM_MINMAX_4X2__WASM, strided_cm) {
32562 GemmMicrokernelTester()
32563 .mr(4)
32564 .nr(2)
32565 .kr(1)
32566 .sr(1)
32567 .m(4)
32568 .n(2)
32569 .k(1)
32570 .cm_stride(5)
32571 .Test(xnn_f32_igemm_minmax_ukernel_4x2__wasm, xnn_init_f32_minmax_scalar_params);
32572 }
32573#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32574
32575
32576TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1) {
32577 GemmMicrokernelTester()
32578 .mr(1)
32579 .nr(4)
32580 .kr(1)
32581 .sr(1)
32582 .m(1)
32583 .n(4)
32584 .k(1)
32585 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32586}
32587
32588TEST(F32_IGEMM_MINMAX_1X4__SCALAR, strided_cn) {
32589 GemmMicrokernelTester()
32590 .mr(1)
32591 .nr(4)
32592 .kr(1)
32593 .sr(1)
32594 .m(1)
32595 .n(4)
32596 .k(1)
32597 .cn_stride(7)
32598 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32599}
32600
32601TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032602 for (uint32_t n = 1; n <= 4; n++) {
32603 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032604 GemmMicrokernelTester()
32605 .mr(1)
32606 .nr(4)
32607 .kr(1)
32608 .sr(1)
32609 .m(m)
32610 .n(n)
32611 .k(1)
32612 .iterations(1)
32613 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32614 }
32615 }
32616}
32617
32618TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
32619 for (uint32_t m = 1; m <= 1; m++) {
32620 GemmMicrokernelTester()
32621 .mr(1)
32622 .nr(4)
32623 .kr(1)
32624 .sr(1)
32625 .m(m)
32626 .n(4)
32627 .k(1)
32628 .iterations(1)
32629 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32630 }
32631}
32632
32633TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
32634 for (uint32_t n = 1; n <= 4; n++) {
32635 GemmMicrokernelTester()
32636 .mr(1)
32637 .nr(4)
32638 .kr(1)
32639 .sr(1)
32640 .m(1)
32641 .n(n)
32642 .k(1)
32643 .iterations(1)
32644 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32645 }
32646}
32647
32648TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_gt_1) {
32649 for (size_t k = 2; k < 10; k++) {
32650 GemmMicrokernelTester()
32651 .mr(1)
32652 .nr(4)
32653 .kr(1)
32654 .sr(1)
32655 .m(1)
32656 .n(4)
32657 .k(k)
32658 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32659 }
32660}
32661
32662TEST(F32_IGEMM_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
32663 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032664 for (uint32_t n = 1; n <= 4; n++) {
32665 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032666 GemmMicrokernelTester()
32667 .mr(1)
32668 .nr(4)
32669 .kr(1)
32670 .sr(1)
32671 .m(m)
32672 .n(n)
32673 .k(k)
32674 .iterations(1)
32675 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32676 }
32677 }
32678 }
32679}
32680
32681TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4) {
32682 for (uint32_t n = 5; n < 8; n++) {
32683 for (size_t k = 1; k <= 5; k += 2) {
32684 GemmMicrokernelTester()
32685 .mr(1)
32686 .nr(4)
32687 .kr(1)
32688 .sr(1)
32689 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032690 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032691 .k(k)
32692 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32693 }
32694 }
32695}
32696
32697TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
32698 for (uint32_t n = 5; n < 8; n++) {
32699 for (size_t k = 1; k <= 5; k += 2) {
32700 GemmMicrokernelTester()
32701 .mr(1)
32702 .nr(4)
32703 .kr(1)
32704 .sr(1)
32705 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032706 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032707 .k(k)
32708 .cn_stride(7)
32709 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32710 }
32711 }
32712}
32713
32714TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
32715 for (uint32_t n = 5; n < 8; n++) {
32716 for (size_t k = 1; k <= 5; k += 2) {
32717 for (uint32_t m = 1; m <= 1; m++) {
32718 GemmMicrokernelTester()
32719 .mr(1)
32720 .nr(4)
32721 .kr(1)
32722 .sr(1)
32723 .m(m)
32724 .n(n)
32725 .k(k)
32726 .iterations(1)
32727 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32728 }
32729 }
32730 }
32731}
32732
32733TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4) {
32734 for (uint32_t n = 8; n <= 12; n += 4) {
32735 for (size_t k = 1; k <= 5; k += 2) {
32736 GemmMicrokernelTester()
32737 .mr(1)
32738 .nr(4)
32739 .kr(1)
32740 .sr(1)
32741 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032742 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032743 .k(k)
32744 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32745 }
32746 }
32747}
32748
32749TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
32750 for (uint32_t n = 8; n <= 12; n += 4) {
32751 for (size_t k = 1; k <= 5; k += 2) {
32752 GemmMicrokernelTester()
32753 .mr(1)
32754 .nr(4)
32755 .kr(1)
32756 .sr(1)
32757 .m(1)
32758 .n(n)
32759 .k(k)
32760 .cn_stride(7)
32761 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32762 }
32763 }
32764}
32765
32766TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4_subtile) {
32767 for (uint32_t n = 8; n <= 12; n += 4) {
32768 for (size_t k = 1; k <= 5; k += 2) {
32769 for (uint32_t m = 1; m <= 1; m++) {
32770 GemmMicrokernelTester()
32771 .mr(1)
32772 .nr(4)
32773 .kr(1)
32774 .sr(1)
32775 .m(m)
32776 .n(n)
32777 .k(k)
32778 .iterations(1)
32779 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32780 }
32781 }
32782 }
32783}
32784
32785TEST(F32_IGEMM_MINMAX_1X4__SCALAR, small_kernel) {
32786 for (size_t k = 1; k <= 5; k += 2) {
32787 GemmMicrokernelTester()
32788 .mr(1)
32789 .nr(4)
32790 .kr(1)
32791 .sr(1)
32792 .m(1)
32793 .n(4)
32794 .k(k)
32795 .ks(3)
32796 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32797 }
32798}
32799
32800TEST(F32_IGEMM_MINMAX_1X4__SCALAR, small_kernel_subtile) {
32801 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032802 for (uint32_t n = 1; n <= 4; n++) {
32803 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032804 GemmMicrokernelTester()
32805 .mr(1)
32806 .nr(4)
32807 .kr(1)
32808 .sr(1)
32809 .m(m)
32810 .n(n)
32811 .k(k)
32812 .ks(3)
32813 .iterations(1)
32814 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32815 }
32816 }
32817 }
32818}
32819
32820TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_small_kernel) {
32821 for (uint32_t n = 5; n < 8; n++) {
32822 for (size_t k = 1; k <= 5; k += 2) {
32823 GemmMicrokernelTester()
32824 .mr(1)
32825 .nr(4)
32826 .kr(1)
32827 .sr(1)
32828 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032829 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032830 .k(k)
32831 .ks(3)
32832 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32833 }
32834 }
32835}
32836
32837TEST(F32_IGEMM_MINMAX_1X4__SCALAR, n_div_4_small_kernel) {
32838 for (uint32_t n = 8; n <= 12; n += 4) {
32839 for (size_t k = 1; k <= 5; k += 2) {
32840 GemmMicrokernelTester()
32841 .mr(1)
32842 .nr(4)
32843 .kr(1)
32844 .sr(1)
32845 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032846 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032847 .k(k)
32848 .ks(3)
32849 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32850 }
32851 }
32852}
32853
32854TEST(F32_IGEMM_MINMAX_1X4__SCALAR, strided_cm_subtile) {
32855 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032856 for (uint32_t n = 1; n <= 4; n++) {
32857 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032858 GemmMicrokernelTester()
32859 .mr(1)
32860 .nr(4)
32861 .kr(1)
32862 .sr(1)
32863 .m(m)
32864 .n(n)
32865 .k(k)
32866 .cm_stride(7)
32867 .iterations(1)
32868 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32869 }
32870 }
32871 }
32872}
32873
32874TEST(F32_IGEMM_MINMAX_1X4__SCALAR, a_offset) {
32875 for (size_t k = 1; k <= 5; k += 2) {
32876 GemmMicrokernelTester()
32877 .mr(1)
32878 .nr(4)
32879 .kr(1)
32880 .sr(1)
32881 .m(1)
32882 .n(4)
32883 .k(k)
32884 .ks(3)
32885 .a_offset(7)
32886 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32887 }
32888}
32889
32890TEST(F32_IGEMM_MINMAX_1X4__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032891 for (size_t k = 1; k <= 5; k += 2) {
32892 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032893 GemmMicrokernelTester()
32894 .mr(1)
32895 .nr(4)
32896 .kr(1)
32897 .sr(1)
32898 .m(1)
32899 .n(4)
32900 .k(k)
32901 .ks(3)
32902 .a_offset(7)
32903 .zero_index(mz)
32904 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32905 }
32906 }
32907}
32908
32909TEST(F32_IGEMM_MINMAX_1X4__SCALAR, qmin) {
32910 GemmMicrokernelTester()
32911 .mr(1)
32912 .nr(4)
32913 .kr(1)
32914 .sr(1)
32915 .m(1)
32916 .n(4)
32917 .k(1)
32918 .qmin(128)
32919 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32920}
32921
32922TEST(F32_IGEMM_MINMAX_1X4__SCALAR, qmax) {
32923 GemmMicrokernelTester()
32924 .mr(1)
32925 .nr(4)
32926 .kr(1)
32927 .sr(1)
32928 .m(1)
32929 .n(4)
32930 .k(1)
32931 .qmax(128)
32932 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32933}
32934
32935TEST(F32_IGEMM_MINMAX_1X4__SCALAR, strided_cm) {
32936 GemmMicrokernelTester()
32937 .mr(1)
32938 .nr(4)
32939 .kr(1)
32940 .sr(1)
32941 .m(1)
32942 .n(4)
32943 .k(1)
32944 .cm_stride(7)
32945 .Test(xnn_f32_igemm_minmax_ukernel_1x4__scalar, xnn_init_f32_minmax_scalar_params);
32946}
32947
32948
32949TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1) {
32950 GemmMicrokernelTester()
32951 .mr(4)
32952 .nr(4)
32953 .kr(1)
32954 .sr(1)
32955 .m(4)
32956 .n(4)
32957 .k(1)
32958 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
32959}
32960
32961TEST(F32_IGEMM_MINMAX_4X4__SCALAR, strided_cn) {
32962 GemmMicrokernelTester()
32963 .mr(4)
32964 .nr(4)
32965 .kr(1)
32966 .sr(1)
32967 .m(4)
32968 .n(4)
32969 .k(1)
32970 .cn_stride(7)
32971 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
32972}
32973
32974TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032975 for (uint32_t n = 1; n <= 4; n++) {
32976 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080032977 GemmMicrokernelTester()
32978 .mr(4)
32979 .nr(4)
32980 .kr(1)
32981 .sr(1)
32982 .m(m)
32983 .n(n)
32984 .k(1)
32985 .iterations(1)
32986 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
32987 }
32988 }
32989}
32990
32991TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
32992 for (uint32_t m = 1; m <= 4; m++) {
32993 GemmMicrokernelTester()
32994 .mr(4)
32995 .nr(4)
32996 .kr(1)
32997 .sr(1)
32998 .m(m)
32999 .n(4)
33000 .k(1)
33001 .iterations(1)
33002 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33003 }
33004}
33005
33006TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
33007 for (uint32_t n = 1; n <= 4; n++) {
33008 GemmMicrokernelTester()
33009 .mr(4)
33010 .nr(4)
33011 .kr(1)
33012 .sr(1)
33013 .m(4)
33014 .n(n)
33015 .k(1)
33016 .iterations(1)
33017 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33018 }
33019}
33020
33021TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_gt_1) {
33022 for (size_t k = 2; k < 10; k++) {
33023 GemmMicrokernelTester()
33024 .mr(4)
33025 .nr(4)
33026 .kr(1)
33027 .sr(1)
33028 .m(4)
33029 .n(4)
33030 .k(k)
33031 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33032 }
33033}
33034
33035TEST(F32_IGEMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
33036 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033037 for (uint32_t n = 1; n <= 4; n++) {
33038 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033039 GemmMicrokernelTester()
33040 .mr(4)
33041 .nr(4)
33042 .kr(1)
33043 .sr(1)
33044 .m(m)
33045 .n(n)
33046 .k(k)
33047 .iterations(1)
33048 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33049 }
33050 }
33051 }
33052}
33053
33054TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4) {
33055 for (uint32_t n = 5; n < 8; n++) {
33056 for (size_t k = 1; k <= 5; k += 2) {
33057 GemmMicrokernelTester()
33058 .mr(4)
33059 .nr(4)
33060 .kr(1)
33061 .sr(1)
33062 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033063 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033064 .k(k)
33065 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33066 }
33067 }
33068}
33069
33070TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
33071 for (uint32_t n = 5; n < 8; n++) {
33072 for (size_t k = 1; k <= 5; k += 2) {
33073 GemmMicrokernelTester()
33074 .mr(4)
33075 .nr(4)
33076 .kr(1)
33077 .sr(1)
33078 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033079 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033080 .k(k)
33081 .cn_stride(7)
33082 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33083 }
33084 }
33085}
33086
33087TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
33088 for (uint32_t n = 5; n < 8; n++) {
33089 for (size_t k = 1; k <= 5; k += 2) {
33090 for (uint32_t m = 1; m <= 4; m++) {
33091 GemmMicrokernelTester()
33092 .mr(4)
33093 .nr(4)
33094 .kr(1)
33095 .sr(1)
33096 .m(m)
33097 .n(n)
33098 .k(k)
33099 .iterations(1)
33100 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33101 }
33102 }
33103 }
33104}
33105
33106TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4) {
33107 for (uint32_t n = 8; n <= 12; n += 4) {
33108 for (size_t k = 1; k <= 5; k += 2) {
33109 GemmMicrokernelTester()
33110 .mr(4)
33111 .nr(4)
33112 .kr(1)
33113 .sr(1)
33114 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033115 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033116 .k(k)
33117 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33118 }
33119 }
33120}
33121
33122TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
33123 for (uint32_t n = 8; n <= 12; n += 4) {
33124 for (size_t k = 1; k <= 5; k += 2) {
33125 GemmMicrokernelTester()
33126 .mr(4)
33127 .nr(4)
33128 .kr(1)
33129 .sr(1)
33130 .m(4)
33131 .n(n)
33132 .k(k)
33133 .cn_stride(7)
33134 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33135 }
33136 }
33137}
33138
33139TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
33140 for (uint32_t n = 8; n <= 12; n += 4) {
33141 for (size_t k = 1; k <= 5; k += 2) {
33142 for (uint32_t m = 1; m <= 4; m++) {
33143 GemmMicrokernelTester()
33144 .mr(4)
33145 .nr(4)
33146 .kr(1)
33147 .sr(1)
33148 .m(m)
33149 .n(n)
33150 .k(k)
33151 .iterations(1)
33152 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33153 }
33154 }
33155 }
33156}
33157
33158TEST(F32_IGEMM_MINMAX_4X4__SCALAR, small_kernel) {
33159 for (size_t k = 1; k <= 5; k += 2) {
33160 GemmMicrokernelTester()
33161 .mr(4)
33162 .nr(4)
33163 .kr(1)
33164 .sr(1)
33165 .m(4)
33166 .n(4)
33167 .k(k)
33168 .ks(3)
33169 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33170 }
33171}
33172
33173TEST(F32_IGEMM_MINMAX_4X4__SCALAR, small_kernel_subtile) {
33174 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033175 for (uint32_t n = 1; n <= 4; n++) {
33176 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033177 GemmMicrokernelTester()
33178 .mr(4)
33179 .nr(4)
33180 .kr(1)
33181 .sr(1)
33182 .m(m)
33183 .n(n)
33184 .k(k)
33185 .ks(3)
33186 .iterations(1)
33187 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33188 }
33189 }
33190 }
33191}
33192
33193TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_small_kernel) {
33194 for (uint32_t n = 5; n < 8; n++) {
33195 for (size_t k = 1; k <= 5; k += 2) {
33196 GemmMicrokernelTester()
33197 .mr(4)
33198 .nr(4)
33199 .kr(1)
33200 .sr(1)
33201 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033202 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033203 .k(k)
33204 .ks(3)
33205 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33206 }
33207 }
33208}
33209
33210TEST(F32_IGEMM_MINMAX_4X4__SCALAR, n_div_4_small_kernel) {
33211 for (uint32_t n = 8; n <= 12; n += 4) {
33212 for (size_t k = 1; k <= 5; k += 2) {
33213 GemmMicrokernelTester()
33214 .mr(4)
33215 .nr(4)
33216 .kr(1)
33217 .sr(1)
33218 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033219 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033220 .k(k)
33221 .ks(3)
33222 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33223 }
33224 }
33225}
33226
33227TEST(F32_IGEMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
33228 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033229 for (uint32_t n = 1; n <= 4; n++) {
33230 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033231 GemmMicrokernelTester()
33232 .mr(4)
33233 .nr(4)
33234 .kr(1)
33235 .sr(1)
33236 .m(m)
33237 .n(n)
33238 .k(k)
33239 .cm_stride(7)
33240 .iterations(1)
33241 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33242 }
33243 }
33244 }
33245}
33246
33247TEST(F32_IGEMM_MINMAX_4X4__SCALAR, a_offset) {
33248 for (size_t k = 1; k <= 5; k += 2) {
33249 GemmMicrokernelTester()
33250 .mr(4)
33251 .nr(4)
33252 .kr(1)
33253 .sr(1)
33254 .m(4)
33255 .n(4)
33256 .k(k)
33257 .ks(3)
33258 .a_offset(23)
33259 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33260 }
33261}
33262
33263TEST(F32_IGEMM_MINMAX_4X4__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033264 for (size_t k = 1; k <= 5; k += 2) {
33265 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033266 GemmMicrokernelTester()
33267 .mr(4)
33268 .nr(4)
33269 .kr(1)
33270 .sr(1)
33271 .m(4)
33272 .n(4)
33273 .k(k)
33274 .ks(3)
33275 .a_offset(23)
33276 .zero_index(mz)
33277 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33278 }
33279 }
33280}
33281
33282TEST(F32_IGEMM_MINMAX_4X4__SCALAR, qmin) {
33283 GemmMicrokernelTester()
33284 .mr(4)
33285 .nr(4)
33286 .kr(1)
33287 .sr(1)
33288 .m(4)
33289 .n(4)
33290 .k(1)
33291 .qmin(128)
33292 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33293}
33294
33295TEST(F32_IGEMM_MINMAX_4X4__SCALAR, qmax) {
33296 GemmMicrokernelTester()
33297 .mr(4)
33298 .nr(4)
33299 .kr(1)
33300 .sr(1)
33301 .m(4)
33302 .n(4)
33303 .k(1)
33304 .qmax(128)
33305 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33306}
33307
33308TEST(F32_IGEMM_MINMAX_4X4__SCALAR, strided_cm) {
33309 GemmMicrokernelTester()
33310 .mr(4)
33311 .nr(4)
33312 .kr(1)
33313 .sr(1)
33314 .m(4)
33315 .n(4)
33316 .k(1)
33317 .cm_stride(7)
33318 .Test(xnn_f32_igemm_minmax_ukernel_4x4__scalar, xnn_init_f32_minmax_scalar_params);
33319}
33320
33321
33322TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1) {
33323 GemmMicrokernelTester()
33324 .mr(4)
33325 .nr(2)
33326 .kr(1)
33327 .sr(1)
33328 .m(4)
33329 .n(2)
33330 .k(1)
33331 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33332}
33333
33334TEST(F32_IGEMM_MINMAX_4X2__SCALAR, strided_cn) {
33335 GemmMicrokernelTester()
33336 .mr(4)
33337 .nr(2)
33338 .kr(1)
33339 .sr(1)
33340 .m(4)
33341 .n(2)
33342 .k(1)
33343 .cn_stride(5)
33344 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33345}
33346
33347TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033348 for (uint32_t n = 1; n <= 2; n++) {
33349 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033350 GemmMicrokernelTester()
33351 .mr(4)
33352 .nr(2)
33353 .kr(1)
33354 .sr(1)
33355 .m(m)
33356 .n(n)
33357 .k(1)
33358 .iterations(1)
33359 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33360 }
33361 }
33362}
33363
33364TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
33365 for (uint32_t m = 1; m <= 4; m++) {
33366 GemmMicrokernelTester()
33367 .mr(4)
33368 .nr(2)
33369 .kr(1)
33370 .sr(1)
33371 .m(m)
33372 .n(2)
33373 .k(1)
33374 .iterations(1)
33375 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33376 }
33377}
33378
33379TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
33380 for (uint32_t n = 1; n <= 2; n++) {
33381 GemmMicrokernelTester()
33382 .mr(4)
33383 .nr(2)
33384 .kr(1)
33385 .sr(1)
33386 .m(4)
33387 .n(n)
33388 .k(1)
33389 .iterations(1)
33390 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33391 }
33392}
33393
33394TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_gt_1) {
33395 for (size_t k = 2; k < 10; k++) {
33396 GemmMicrokernelTester()
33397 .mr(4)
33398 .nr(2)
33399 .kr(1)
33400 .sr(1)
33401 .m(4)
33402 .n(2)
33403 .k(k)
33404 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33405 }
33406}
33407
33408TEST(F32_IGEMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
33409 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033410 for (uint32_t n = 1; n <= 2; n++) {
33411 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033412 GemmMicrokernelTester()
33413 .mr(4)
33414 .nr(2)
33415 .kr(1)
33416 .sr(1)
33417 .m(m)
33418 .n(n)
33419 .k(k)
33420 .iterations(1)
33421 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33422 }
33423 }
33424 }
33425}
33426
33427TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2) {
33428 for (uint32_t n = 3; n < 4; n++) {
33429 for (size_t k = 1; k <= 5; k += 2) {
33430 GemmMicrokernelTester()
33431 .mr(4)
33432 .nr(2)
33433 .kr(1)
33434 .sr(1)
33435 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033436 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033437 .k(k)
33438 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33439 }
33440 }
33441}
33442
33443TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
33444 for (uint32_t n = 3; n < 4; n++) {
33445 for (size_t k = 1; k <= 5; k += 2) {
33446 GemmMicrokernelTester()
33447 .mr(4)
33448 .nr(2)
33449 .kr(1)
33450 .sr(1)
33451 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033452 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033453 .k(k)
33454 .cn_stride(5)
33455 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33456 }
33457 }
33458}
33459
33460TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
33461 for (uint32_t n = 3; n < 4; n++) {
33462 for (size_t k = 1; k <= 5; k += 2) {
33463 for (uint32_t m = 1; m <= 4; m++) {
33464 GemmMicrokernelTester()
33465 .mr(4)
33466 .nr(2)
33467 .kr(1)
33468 .sr(1)
33469 .m(m)
33470 .n(n)
33471 .k(k)
33472 .iterations(1)
33473 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33474 }
33475 }
33476 }
33477}
33478
33479TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2) {
33480 for (uint32_t n = 4; n <= 6; n += 2) {
33481 for (size_t k = 1; k <= 5; k += 2) {
33482 GemmMicrokernelTester()
33483 .mr(4)
33484 .nr(2)
33485 .kr(1)
33486 .sr(1)
33487 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033488 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033489 .k(k)
33490 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33491 }
33492 }
33493}
33494
33495TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
33496 for (uint32_t n = 4; n <= 6; n += 2) {
33497 for (size_t k = 1; k <= 5; k += 2) {
33498 GemmMicrokernelTester()
33499 .mr(4)
33500 .nr(2)
33501 .kr(1)
33502 .sr(1)
33503 .m(4)
33504 .n(n)
33505 .k(k)
33506 .cn_stride(5)
33507 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33508 }
33509 }
33510}
33511
33512TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
33513 for (uint32_t n = 4; n <= 6; n += 2) {
33514 for (size_t k = 1; k <= 5; k += 2) {
33515 for (uint32_t m = 1; m <= 4; m++) {
33516 GemmMicrokernelTester()
33517 .mr(4)
33518 .nr(2)
33519 .kr(1)
33520 .sr(1)
33521 .m(m)
33522 .n(n)
33523 .k(k)
33524 .iterations(1)
33525 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33526 }
33527 }
33528 }
33529}
33530
33531TEST(F32_IGEMM_MINMAX_4X2__SCALAR, small_kernel) {
33532 for (size_t k = 1; k <= 5; k += 2) {
33533 GemmMicrokernelTester()
33534 .mr(4)
33535 .nr(2)
33536 .kr(1)
33537 .sr(1)
33538 .m(4)
33539 .n(2)
33540 .k(k)
33541 .ks(3)
33542 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33543 }
33544}
33545
33546TEST(F32_IGEMM_MINMAX_4X2__SCALAR, small_kernel_subtile) {
33547 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033548 for (uint32_t n = 1; n <= 2; n++) {
33549 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033550 GemmMicrokernelTester()
33551 .mr(4)
33552 .nr(2)
33553 .kr(1)
33554 .sr(1)
33555 .m(m)
33556 .n(n)
33557 .k(k)
33558 .ks(3)
33559 .iterations(1)
33560 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33561 }
33562 }
33563 }
33564}
33565
33566TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_small_kernel) {
33567 for (uint32_t n = 3; n < 4; n++) {
33568 for (size_t k = 1; k <= 5; k += 2) {
33569 GemmMicrokernelTester()
33570 .mr(4)
33571 .nr(2)
33572 .kr(1)
33573 .sr(1)
33574 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033575 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033576 .k(k)
33577 .ks(3)
33578 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33579 }
33580 }
33581}
33582
33583TEST(F32_IGEMM_MINMAX_4X2__SCALAR, n_div_2_small_kernel) {
33584 for (uint32_t n = 4; n <= 6; n += 2) {
33585 for (size_t k = 1; k <= 5; k += 2) {
33586 GemmMicrokernelTester()
33587 .mr(4)
33588 .nr(2)
33589 .kr(1)
33590 .sr(1)
33591 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033592 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033593 .k(k)
33594 .ks(3)
33595 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33596 }
33597 }
33598}
33599
33600TEST(F32_IGEMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
33601 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033602 for (uint32_t n = 1; n <= 2; n++) {
33603 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033604 GemmMicrokernelTester()
33605 .mr(4)
33606 .nr(2)
33607 .kr(1)
33608 .sr(1)
33609 .m(m)
33610 .n(n)
33611 .k(k)
33612 .cm_stride(5)
33613 .iterations(1)
33614 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33615 }
33616 }
33617 }
33618}
33619
33620TEST(F32_IGEMM_MINMAX_4X2__SCALAR, a_offset) {
33621 for (size_t k = 1; k <= 5; k += 2) {
33622 GemmMicrokernelTester()
33623 .mr(4)
33624 .nr(2)
33625 .kr(1)
33626 .sr(1)
33627 .m(4)
33628 .n(2)
33629 .k(k)
33630 .ks(3)
33631 .a_offset(23)
33632 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33633 }
33634}
33635
33636TEST(F32_IGEMM_MINMAX_4X2__SCALAR, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033637 for (size_t k = 1; k <= 5; k += 2) {
33638 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033639 GemmMicrokernelTester()
33640 .mr(4)
33641 .nr(2)
33642 .kr(1)
33643 .sr(1)
33644 .m(4)
33645 .n(2)
33646 .k(k)
33647 .ks(3)
33648 .a_offset(23)
33649 .zero_index(mz)
33650 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33651 }
33652 }
33653}
33654
33655TEST(F32_IGEMM_MINMAX_4X2__SCALAR, qmin) {
33656 GemmMicrokernelTester()
33657 .mr(4)
33658 .nr(2)
33659 .kr(1)
33660 .sr(1)
33661 .m(4)
33662 .n(2)
33663 .k(1)
33664 .qmin(128)
33665 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33666}
33667
33668TEST(F32_IGEMM_MINMAX_4X2__SCALAR, qmax) {
33669 GemmMicrokernelTester()
33670 .mr(4)
33671 .nr(2)
33672 .kr(1)
33673 .sr(1)
33674 .m(4)
33675 .n(2)
33676 .k(1)
33677 .qmax(128)
33678 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33679}
33680
33681TEST(F32_IGEMM_MINMAX_4X2__SCALAR, strided_cm) {
33682 GemmMicrokernelTester()
33683 .mr(4)
33684 .nr(2)
33685 .kr(1)
33686 .sr(1)
33687 .m(4)
33688 .n(2)
33689 .k(1)
33690 .cm_stride(5)
33691 .Test(xnn_f32_igemm_minmax_ukernel_4x2__scalar, xnn_init_f32_minmax_scalar_params);
33692}
33693
33694
33695#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
33696 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4) {
33697 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033698 GemmMicrokernelTester()
33699 .mr(4)
33700 .nr(8)
33701 .kr(1)
33702 .sr(1)
33703 .m(4)
33704 .n(8)
33705 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033706 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033707 }
33708
33709 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cn) {
33710 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033711 GemmMicrokernelTester()
33712 .mr(4)
33713 .nr(8)
33714 .kr(1)
33715 .sr(1)
33716 .m(4)
33717 .n(8)
33718 .k(4)
33719 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033720 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033721 }
33722
33723 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile) {
33724 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080033725 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033726 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033727 GemmMicrokernelTester()
33728 .mr(4)
33729 .nr(8)
33730 .kr(1)
33731 .sr(1)
33732 .m(m)
33733 .n(n)
33734 .k(4)
33735 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033736 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033737 }
33738 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033739 }
33740
33741 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_m) {
33742 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033743 for (uint32_t m = 1; m <= 4; m++) {
33744 GemmMicrokernelTester()
33745 .mr(4)
33746 .nr(8)
33747 .kr(1)
33748 .sr(1)
33749 .m(m)
33750 .n(8)
33751 .k(4)
33752 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033753 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033754 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033755 }
33756
33757 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_4_subtile_n) {
33758 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033759 for (uint32_t n = 1; n <= 8; n++) {
33760 GemmMicrokernelTester()
33761 .mr(4)
33762 .nr(8)
33763 .kr(1)
33764 .sr(1)
33765 .m(4)
33766 .n(n)
33767 .k(4)
33768 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033769 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033770 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033771 }
33772
33773 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8) {
33774 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033775 GemmMicrokernelTester()
33776 .mr(4)
33777 .nr(8)
33778 .kr(1)
33779 .sr(1)
33780 .m(4)
33781 .n(8)
33782 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033783 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033784 }
33785
33786 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_eq_8_subtile) {
33787 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080033788 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033789 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033790 GemmMicrokernelTester()
33791 .mr(4)
33792 .nr(8)
33793 .kr(1)
33794 .sr(1)
33795 .m(m)
33796 .n(n)
33797 .k(8)
33798 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033799 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033800 }
33801 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033802 }
33803
33804 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8) {
33805 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033806 for (size_t k = 1; k < 8; k++) {
33807 GemmMicrokernelTester()
33808 .mr(4)
33809 .nr(8)
33810 .kr(1)
33811 .sr(1)
33812 .m(4)
33813 .n(8)
33814 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033815 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033816 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033817 }
33818
33819 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_lt_8_subtile) {
33820 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033821 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033822 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033823 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033824 GemmMicrokernelTester()
33825 .mr(4)
33826 .nr(8)
33827 .kr(1)
33828 .sr(1)
33829 .m(m)
33830 .n(n)
33831 .k(k)
33832 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033833 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033834 }
33835 }
33836 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033837 }
33838
33839 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8) {
33840 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033841 for (size_t k = 9; k < 16; k++) {
33842 GemmMicrokernelTester()
33843 .mr(4)
33844 .nr(8)
33845 .kr(1)
33846 .sr(1)
33847 .m(4)
33848 .n(8)
33849 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033850 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033851 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033852 }
33853
33854 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_gt_8_subtile) {
33855 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033856 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033857 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033858 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033859 GemmMicrokernelTester()
33860 .mr(4)
33861 .nr(8)
33862 .kr(1)
33863 .sr(1)
33864 .m(m)
33865 .n(n)
33866 .k(k)
33867 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033868 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033869 }
33870 }
33871 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033872 }
33873
33874 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4) {
33875 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033876 for (size_t k = 12; k <= 40; k += 4) {
33877 GemmMicrokernelTester()
33878 .mr(4)
33879 .nr(8)
33880 .kr(1)
33881 .sr(1)
33882 .m(4)
33883 .n(8)
33884 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033885 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033886 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033887 }
33888
33889 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, k_div_4_subtile) {
33890 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033891 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033892 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033893 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033894 GemmMicrokernelTester()
33895 .mr(4)
33896 .nr(8)
33897 .kr(1)
33898 .sr(1)
33899 .m(m)
33900 .n(n)
33901 .k(k)
33902 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033903 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033904 }
33905 }
33906 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033907 }
33908
33909 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8) {
33910 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033911 for (uint32_t n = 9; n < 16; n++) {
33912 for (size_t k = 1; k <= 20; k += 5) {
33913 GemmMicrokernelTester()
33914 .mr(4)
33915 .nr(8)
33916 .kr(1)
33917 .sr(1)
33918 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033919 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033920 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033921 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033922 }
33923 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033924 }
33925
33926 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
33927 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033928 for (uint32_t n = 9; n < 16; n++) {
33929 for (size_t k = 1; k <= 20; k += 5) {
33930 GemmMicrokernelTester()
33931 .mr(4)
33932 .nr(8)
33933 .kr(1)
33934 .sr(1)
33935 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033936 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033937 .k(k)
33938 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033939 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033940 }
33941 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033942 }
33943
33944 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_subtile) {
33945 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033946 for (uint32_t n = 9; n < 16; n++) {
33947 for (size_t k = 1; k <= 20; k += 5) {
33948 for (uint32_t m = 1; m <= 4; m++) {
33949 GemmMicrokernelTester()
33950 .mr(4)
33951 .nr(8)
33952 .kr(1)
33953 .sr(1)
33954 .m(m)
33955 .n(n)
33956 .k(k)
33957 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033958 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033959 }
33960 }
33961 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033962 }
33963
33964 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8) {
33965 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033966 for (uint32_t n = 16; n <= 24; n += 8) {
33967 for (size_t k = 1; k <= 20; k += 5) {
33968 GemmMicrokernelTester()
33969 .mr(4)
33970 .nr(8)
33971 .kr(1)
33972 .sr(1)
33973 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033974 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033975 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033976 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033977 }
33978 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033979 }
33980
33981 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_strided_cn) {
33982 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033983 for (uint32_t n = 16; n <= 24; n += 8) {
33984 for (size_t k = 1; k <= 20; k += 5) {
33985 GemmMicrokernelTester()
33986 .mr(4)
33987 .nr(8)
33988 .kr(1)
33989 .sr(1)
33990 .m(4)
33991 .n(n)
33992 .k(k)
33993 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080033994 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033995 }
33996 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080033997 }
33998
33999 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_subtile) {
34000 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034001 for (uint32_t n = 16; n <= 24; n += 8) {
34002 for (size_t k = 1; k <= 20; k += 5) {
34003 for (uint32_t m = 1; m <= 4; m++) {
34004 GemmMicrokernelTester()
34005 .mr(4)
34006 .nr(8)
34007 .kr(1)
34008 .sr(1)
34009 .m(m)
34010 .n(n)
34011 .k(k)
34012 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034013 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034014 }
34015 }
34016 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034017 }
34018
34019 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel) {
34020 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034021 for (size_t k = 1; k <= 20; k += 5) {
34022 GemmMicrokernelTester()
34023 .mr(4)
34024 .nr(8)
34025 .kr(1)
34026 .sr(1)
34027 .m(4)
34028 .n(8)
34029 .k(k)
34030 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034031 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034032 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034033 }
34034
34035 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, small_kernel_subtile) {
34036 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034037 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034038 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034039 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034040 GemmMicrokernelTester()
34041 .mr(4)
34042 .nr(8)
34043 .kr(1)
34044 .sr(1)
34045 .m(m)
34046 .n(n)
34047 .k(k)
34048 .ks(3)
34049 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034050 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034051 }
34052 }
34053 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034054 }
34055
34056 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
34057 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034058 for (uint32_t n = 9; n < 16; n++) {
34059 for (size_t k = 1; k <= 20; k += 5) {
34060 GemmMicrokernelTester()
34061 .mr(4)
34062 .nr(8)
34063 .kr(1)
34064 .sr(1)
34065 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034066 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034067 .k(k)
34068 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034069 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034070 }
34071 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034072 }
34073
34074 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, n_div_8_small_kernel) {
34075 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034076 for (uint32_t n = 16; n <= 24; n += 8) {
34077 for (size_t k = 1; k <= 20; k += 5) {
34078 GemmMicrokernelTester()
34079 .mr(4)
34080 .nr(8)
34081 .kr(1)
34082 .sr(1)
34083 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034084 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034085 .k(k)
34086 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034087 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034088 }
34089 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034090 }
34091
34092 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm_subtile) {
34093 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034094 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034095 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034096 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034097 GemmMicrokernelTester()
34098 .mr(4)
34099 .nr(8)
34100 .kr(1)
34101 .sr(1)
34102 .m(m)
34103 .n(n)
34104 .k(k)
34105 .cm_stride(11)
34106 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034107 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034108 }
34109 }
34110 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034111 }
34112
34113 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, a_offset) {
34114 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034115 for (size_t k = 1; k <= 20; k += 5) {
34116 GemmMicrokernelTester()
34117 .mr(4)
34118 .nr(8)
34119 .kr(1)
34120 .sr(1)
34121 .m(4)
34122 .n(8)
34123 .k(k)
34124 .ks(3)
34125 .a_offset(83)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034126 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034127 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034128 }
34129
34130 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, zero) {
34131 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080034132 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034133 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034134 GemmMicrokernelTester()
34135 .mr(4)
34136 .nr(8)
34137 .kr(1)
34138 .sr(1)
34139 .m(4)
34140 .n(8)
34141 .k(k)
34142 .ks(3)
34143 .a_offset(83)
34144 .zero_index(mz)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034145 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034146 }
34147 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034148 }
34149
34150 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmin) {
34151 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034152 GemmMicrokernelTester()
34153 .mr(4)
34154 .nr(8)
34155 .kr(1)
34156 .sr(1)
34157 .m(4)
34158 .n(8)
34159 .k(4)
34160 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034161 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034162 }
34163
34164 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, qmax) {
34165 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034166 GemmMicrokernelTester()
34167 .mr(4)
34168 .nr(8)
34169 .kr(1)
34170 .sr(1)
34171 .m(4)
34172 .n(8)
34173 .k(4)
34174 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034175 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034176 }
34177
34178 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_PRFM_CORTEX_A75, strided_cm) {
34179 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034180 GemmMicrokernelTester()
34181 .mr(4)
34182 .nr(8)
34183 .kr(1)
34184 .sr(1)
34185 .m(4)
34186 .n(8)
34187 .k(4)
34188 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034189 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034190 }
34191#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
34192
34193
34194#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
34195 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4) {
34196 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034197 GemmMicrokernelTester()
34198 .mr(4)
34199 .nr(8)
34200 .kr(1)
34201 .sr(1)
34202 .m(4)
34203 .n(8)
34204 .k(4)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034205 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034206 }
34207
34208 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, strided_cn) {
34209 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034210 GemmMicrokernelTester()
34211 .mr(4)
34212 .nr(8)
34213 .kr(1)
34214 .sr(1)
34215 .m(4)
34216 .n(8)
34217 .k(4)
34218 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034219 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034220 }
34221
34222 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile) {
34223 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080034224 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034225 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034226 GemmMicrokernelTester()
34227 .mr(4)
34228 .nr(8)
34229 .kr(1)
34230 .sr(1)
34231 .m(m)
34232 .n(n)
34233 .k(4)
34234 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034235 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034236 }
34237 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034238 }
34239
34240 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_m) {
34241 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034242 for (uint32_t m = 1; m <= 4; m++) {
34243 GemmMicrokernelTester()
34244 .mr(4)
34245 .nr(8)
34246 .kr(1)
34247 .sr(1)
34248 .m(m)
34249 .n(8)
34250 .k(4)
34251 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034252 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034253 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034254 }
34255
34256 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_4_subtile_n) {
34257 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034258 for (uint32_t n = 1; n <= 8; n++) {
34259 GemmMicrokernelTester()
34260 .mr(4)
34261 .nr(8)
34262 .kr(1)
34263 .sr(1)
34264 .m(4)
34265 .n(n)
34266 .k(4)
34267 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034268 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034269 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034270 }
34271
34272 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8) {
34273 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034274 GemmMicrokernelTester()
34275 .mr(4)
34276 .nr(8)
34277 .kr(1)
34278 .sr(1)
34279 .m(4)
34280 .n(8)
34281 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034282 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034283 }
34284
34285 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_eq_8_subtile) {
34286 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080034287 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034288 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034289 GemmMicrokernelTester()
34290 .mr(4)
34291 .nr(8)
34292 .kr(1)
34293 .sr(1)
34294 .m(m)
34295 .n(n)
34296 .k(8)
34297 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034298 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034299 }
34300 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034301 }
34302
34303 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8) {
34304 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034305 for (size_t k = 1; k < 8; k++) {
34306 GemmMicrokernelTester()
34307 .mr(4)
34308 .nr(8)
34309 .kr(1)
34310 .sr(1)
34311 .m(4)
34312 .n(8)
34313 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034314 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034315 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034316 }
34317
34318 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_lt_8_subtile) {
34319 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034320 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034321 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034322 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034323 GemmMicrokernelTester()
34324 .mr(4)
34325 .nr(8)
34326 .kr(1)
34327 .sr(1)
34328 .m(m)
34329 .n(n)
34330 .k(k)
34331 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034332 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034333 }
34334 }
34335 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034336 }
34337
34338 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8) {
34339 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034340 for (size_t k = 9; k < 16; k++) {
34341 GemmMicrokernelTester()
34342 .mr(4)
34343 .nr(8)
34344 .kr(1)
34345 .sr(1)
34346 .m(4)
34347 .n(8)
34348 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034349 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034350 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034351 }
34352
34353 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_gt_8_subtile) {
34354 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034355 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034356 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034357 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034358 GemmMicrokernelTester()
34359 .mr(4)
34360 .nr(8)
34361 .kr(1)
34362 .sr(1)
34363 .m(m)
34364 .n(n)
34365 .k(k)
34366 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034367 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034368 }
34369 }
34370 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034371 }
34372
34373 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_div_4) {
34374 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034375 for (size_t k = 12; k <= 40; k += 4) {
34376 GemmMicrokernelTester()
34377 .mr(4)
34378 .nr(8)
34379 .kr(1)
34380 .sr(1)
34381 .m(4)
34382 .n(8)
34383 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034384 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034385 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034386 }
34387
34388 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, k_div_4_subtile) {
34389 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034390 for (size_t k = 12; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034391 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034392 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034393 GemmMicrokernelTester()
34394 .mr(4)
34395 .nr(8)
34396 .kr(1)
34397 .sr(1)
34398 .m(m)
34399 .n(n)
34400 .k(k)
34401 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034402 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034403 }
34404 }
34405 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034406 }
34407
34408 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8) {
34409 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034410 for (uint32_t n = 9; n < 16; n++) {
34411 for (size_t k = 1; k <= 20; k += 5) {
34412 GemmMicrokernelTester()
34413 .mr(4)
34414 .nr(8)
34415 .kr(1)
34416 .sr(1)
34417 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034418 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034419 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034420 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034421 }
34422 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034423 }
34424
34425 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_strided_cn) {
34426 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034427 for (uint32_t n = 9; n < 16; n++) {
34428 for (size_t k = 1; k <= 20; k += 5) {
34429 GemmMicrokernelTester()
34430 .mr(4)
34431 .nr(8)
34432 .kr(1)
34433 .sr(1)
34434 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034435 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034436 .k(k)
34437 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034438 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034439 }
34440 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034441 }
34442
34443 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_subtile) {
34444 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034445 for (uint32_t n = 9; n < 16; n++) {
34446 for (size_t k = 1; k <= 20; k += 5) {
34447 for (uint32_t m = 1; m <= 4; m++) {
34448 GemmMicrokernelTester()
34449 .mr(4)
34450 .nr(8)
34451 .kr(1)
34452 .sr(1)
34453 .m(m)
34454 .n(n)
34455 .k(k)
34456 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034457 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034458 }
34459 }
34460 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034461 }
34462
34463 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8) {
34464 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034465 for (uint32_t n = 16; n <= 24; n += 8) {
34466 for (size_t k = 1; k <= 20; k += 5) {
34467 GemmMicrokernelTester()
34468 .mr(4)
34469 .nr(8)
34470 .kr(1)
34471 .sr(1)
34472 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034473 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034474 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034475 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034476 }
34477 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034478 }
34479
34480 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_strided_cn) {
34481 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034482 for (uint32_t n = 16; n <= 24; n += 8) {
34483 for (size_t k = 1; k <= 20; k += 5) {
34484 GemmMicrokernelTester()
34485 .mr(4)
34486 .nr(8)
34487 .kr(1)
34488 .sr(1)
34489 .m(4)
34490 .n(n)
34491 .k(k)
34492 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034493 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034494 }
34495 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034496 }
34497
34498 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_subtile) {
34499 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034500 for (uint32_t n = 16; n <= 24; n += 8) {
34501 for (size_t k = 1; k <= 20; k += 5) {
34502 for (uint32_t m = 1; m <= 4; m++) {
34503 GemmMicrokernelTester()
34504 .mr(4)
34505 .nr(8)
34506 .kr(1)
34507 .sr(1)
34508 .m(m)
34509 .n(n)
34510 .k(k)
34511 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034512 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034513 }
34514 }
34515 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034516 }
34517
34518 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, small_kernel) {
34519 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034520 for (size_t k = 1; k <= 20; k += 5) {
34521 GemmMicrokernelTester()
34522 .mr(4)
34523 .nr(8)
34524 .kr(1)
34525 .sr(1)
34526 .m(4)
34527 .n(8)
34528 .k(k)
34529 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034530 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034531 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034532 }
34533
34534 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, small_kernel_subtile) {
34535 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034536 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034537 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034538 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034539 GemmMicrokernelTester()
34540 .mr(4)
34541 .nr(8)
34542 .kr(1)
34543 .sr(1)
34544 .m(m)
34545 .n(n)
34546 .k(k)
34547 .ks(3)
34548 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034549 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034550 }
34551 }
34552 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034553 }
34554
34555 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_gt_8_small_kernel) {
34556 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034557 for (uint32_t n = 9; n < 16; n++) {
34558 for (size_t k = 1; k <= 20; k += 5) {
34559 GemmMicrokernelTester()
34560 .mr(4)
34561 .nr(8)
34562 .kr(1)
34563 .sr(1)
34564 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034565 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034566 .k(k)
34567 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034568 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034569 }
34570 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034571 }
34572
34573 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, n_div_8_small_kernel) {
34574 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034575 for (uint32_t n = 16; n <= 24; n += 8) {
34576 for (size_t k = 1; k <= 20; k += 5) {
34577 GemmMicrokernelTester()
34578 .mr(4)
34579 .nr(8)
34580 .kr(1)
34581 .sr(1)
34582 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034583 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034584 .k(k)
34585 .ks(3)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034586 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034587 }
34588 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034589 }
34590
34591 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, strided_cm_subtile) {
34592 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034593 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034594 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034595 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034596 GemmMicrokernelTester()
34597 .mr(4)
34598 .nr(8)
34599 .kr(1)
34600 .sr(1)
34601 .m(m)
34602 .n(n)
34603 .k(k)
34604 .cm_stride(11)
34605 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034606 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034607 }
34608 }
34609 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034610 }
34611
34612 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, a_offset) {
34613 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034614 for (size_t k = 1; k <= 20; k += 5) {
34615 GemmMicrokernelTester()
34616 .mr(4)
34617 .nr(8)
34618 .kr(1)
34619 .sr(1)
34620 .m(4)
34621 .n(8)
34622 .k(k)
34623 .ks(3)
34624 .a_offset(83)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034625 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034626 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034627 }
34628
34629 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, zero) {
34630 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080034631 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034632 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034633 GemmMicrokernelTester()
34634 .mr(4)
34635 .nr(8)
34636 .kr(1)
34637 .sr(1)
34638 .m(4)
34639 .n(8)
34640 .k(k)
34641 .ks(3)
34642 .a_offset(83)
34643 .zero_index(mz)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034644 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034645 }
34646 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034647 }
34648
34649 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, qmin) {
34650 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034651 GemmMicrokernelTester()
34652 .mr(4)
34653 .nr(8)
34654 .kr(1)
34655 .sr(1)
34656 .m(4)
34657 .n(8)
34658 .k(4)
34659 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034660 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034661 }
34662
34663 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, qmax) {
34664 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034665 GemmMicrokernelTester()
34666 .mr(4)
34667 .nr(8)
34668 .kr(1)
34669 .sr(1)
34670 .m(4)
34671 .n(8)
34672 .k(4)
34673 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034674 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034675 }
34676
34677 TEST(GENERATE_F32_IGEMM_4X8__AARCH32_NEON_CORTEX_A53, strided_cm) {
34678 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034679 GemmMicrokernelTester()
34680 .mr(4)
34681 .nr(8)
34682 .kr(1)
34683 .sr(1)
34684 .m(4)
34685 .n(8)
34686 .k(4)
34687 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080034688 .Test(xnn_generate_f32_igemm_ukernel_4x8__aarch32_neon_cortex_a53, xnn_init_f32_minmax_scalar_params);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080034689 }
34690#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
Zhi An Ngf30a8592022-02-03 16:49:19 -080034691
34692
34693#if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
34694 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8) {
34695 TEST_REQUIRES_ARM_NEON_FMA;
34696 GemmMicrokernelTester()
34697 .mr(1)
34698 .nr(8)
34699 .kr(1)
34700 .sr(1)
34701 .m(1)
34702 .n(8)
34703 .k(8)
34704 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34705 }
34706
34707 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cn) {
34708 TEST_REQUIRES_ARM_NEON_FMA;
34709 GemmMicrokernelTester()
34710 .mr(1)
34711 .nr(8)
34712 .kr(1)
34713 .sr(1)
34714 .m(1)
34715 .n(8)
34716 .k(8)
34717 .cn_stride(11)
34718 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34719 }
34720
34721 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile) {
34722 TEST_REQUIRES_ARM_NEON_FMA;
34723 for (uint32_t n = 1; n <= 8; n++) {
34724 for (uint32_t m = 1; m <= 1; m++) {
34725 GemmMicrokernelTester()
34726 .mr(1)
34727 .nr(8)
34728 .kr(1)
34729 .sr(1)
34730 .m(m)
34731 .n(n)
34732 .k(8)
34733 .iterations(1)
34734 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34735 }
34736 }
34737 }
34738
34739 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_m) {
34740 TEST_REQUIRES_ARM_NEON_FMA;
34741 for (uint32_t m = 1; m <= 1; m++) {
34742 GemmMicrokernelTester()
34743 .mr(1)
34744 .nr(8)
34745 .kr(1)
34746 .sr(1)
34747 .m(m)
34748 .n(8)
34749 .k(8)
34750 .iterations(1)
34751 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34752 }
34753 }
34754
34755 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_8_subtile_n) {
34756 TEST_REQUIRES_ARM_NEON_FMA;
34757 for (uint32_t n = 1; n <= 8; n++) {
34758 GemmMicrokernelTester()
34759 .mr(1)
34760 .nr(8)
34761 .kr(1)
34762 .sr(1)
34763 .m(1)
34764 .n(n)
34765 .k(8)
34766 .iterations(1)
34767 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34768 }
34769 }
34770
34771 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16) {
34772 TEST_REQUIRES_ARM_NEON_FMA;
34773 GemmMicrokernelTester()
34774 .mr(1)
34775 .nr(8)
34776 .kr(1)
34777 .sr(1)
34778 .m(1)
34779 .n(8)
34780 .k(16)
34781 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34782 }
34783
34784 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_eq_16_subtile) {
34785 TEST_REQUIRES_ARM_NEON_FMA;
34786 for (uint32_t n = 1; n <= 8; n++) {
34787 for (uint32_t m = 1; m <= 1; m++) {
34788 GemmMicrokernelTester()
34789 .mr(1)
34790 .nr(8)
34791 .kr(1)
34792 .sr(1)
34793 .m(m)
34794 .n(n)
34795 .k(16)
34796 .iterations(1)
34797 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34798 }
34799 }
34800 }
34801
34802 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16) {
34803 TEST_REQUIRES_ARM_NEON_FMA;
34804 for (size_t k = 1; k < 16; k++) {
34805 GemmMicrokernelTester()
34806 .mr(1)
34807 .nr(8)
34808 .kr(1)
34809 .sr(1)
34810 .m(1)
34811 .n(8)
34812 .k(k)
34813 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34814 }
34815 }
34816
34817 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_lt_16_subtile) {
34818 TEST_REQUIRES_ARM_NEON_FMA;
34819 for (size_t k = 1; k < 16; k++) {
34820 for (uint32_t n = 1; n <= 8; n++) {
34821 for (uint32_t m = 1; m <= 1; m++) {
34822 GemmMicrokernelTester()
34823 .mr(1)
34824 .nr(8)
34825 .kr(1)
34826 .sr(1)
34827 .m(m)
34828 .n(n)
34829 .k(k)
34830 .iterations(1)
34831 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34832 }
34833 }
34834 }
34835 }
34836
34837 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16) {
34838 TEST_REQUIRES_ARM_NEON_FMA;
34839 for (size_t k = 17; k < 32; k++) {
34840 GemmMicrokernelTester()
34841 .mr(1)
34842 .nr(8)
34843 .kr(1)
34844 .sr(1)
34845 .m(1)
34846 .n(8)
34847 .k(k)
34848 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34849 }
34850 }
34851
34852 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_gt_16_subtile) {
34853 TEST_REQUIRES_ARM_NEON_FMA;
34854 for (size_t k = 17; k < 32; k++) {
34855 for (uint32_t n = 1; n <= 8; n++) {
34856 for (uint32_t m = 1; m <= 1; m++) {
34857 GemmMicrokernelTester()
34858 .mr(1)
34859 .nr(8)
34860 .kr(1)
34861 .sr(1)
34862 .m(m)
34863 .n(n)
34864 .k(k)
34865 .iterations(1)
34866 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34867 }
34868 }
34869 }
34870 }
34871
34872 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8) {
34873 TEST_REQUIRES_ARM_NEON_FMA;
34874 for (size_t k = 24; k <= 80; k += 8) {
34875 GemmMicrokernelTester()
34876 .mr(1)
34877 .nr(8)
34878 .kr(1)
34879 .sr(1)
34880 .m(1)
34881 .n(8)
34882 .k(k)
34883 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34884 }
34885 }
34886
34887 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, k_div_8_subtile) {
34888 TEST_REQUIRES_ARM_NEON_FMA;
34889 for (size_t k = 24; k <= 80; k += 8) {
34890 for (uint32_t n = 1; n <= 8; n++) {
34891 for (uint32_t m = 1; m <= 1; m++) {
34892 GemmMicrokernelTester()
34893 .mr(1)
34894 .nr(8)
34895 .kr(1)
34896 .sr(1)
34897 .m(m)
34898 .n(n)
34899 .k(k)
34900 .iterations(1)
34901 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34902 }
34903 }
34904 }
34905 }
34906
34907 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8) {
34908 TEST_REQUIRES_ARM_NEON_FMA;
34909 for (uint32_t n = 9; n < 16; n++) {
34910 for (size_t k = 1; k <= 40; k += 9) {
34911 GemmMicrokernelTester()
34912 .mr(1)
34913 .nr(8)
34914 .kr(1)
34915 .sr(1)
34916 .m(1)
34917 .n(n)
34918 .k(k)
34919 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34920 }
34921 }
34922 }
34923
34924 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_strided_cn) {
34925 TEST_REQUIRES_ARM_NEON_FMA;
34926 for (uint32_t n = 9; n < 16; n++) {
34927 for (size_t k = 1; k <= 40; k += 9) {
34928 GemmMicrokernelTester()
34929 .mr(1)
34930 .nr(8)
34931 .kr(1)
34932 .sr(1)
34933 .m(1)
34934 .n(n)
34935 .k(k)
34936 .cn_stride(11)
34937 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34938 }
34939 }
34940 }
34941
34942 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_subtile) {
34943 TEST_REQUIRES_ARM_NEON_FMA;
34944 for (uint32_t n = 9; n < 16; n++) {
34945 for (size_t k = 1; k <= 40; k += 9) {
34946 for (uint32_t m = 1; m <= 1; m++) {
34947 GemmMicrokernelTester()
34948 .mr(1)
34949 .nr(8)
34950 .kr(1)
34951 .sr(1)
34952 .m(m)
34953 .n(n)
34954 .k(k)
34955 .iterations(1)
34956 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34957 }
34958 }
34959 }
34960 }
34961
34962 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8) {
34963 TEST_REQUIRES_ARM_NEON_FMA;
34964 for (uint32_t n = 16; n <= 24; n += 8) {
34965 for (size_t k = 1; k <= 40; k += 9) {
34966 GemmMicrokernelTester()
34967 .mr(1)
34968 .nr(8)
34969 .kr(1)
34970 .sr(1)
34971 .m(1)
34972 .n(n)
34973 .k(k)
34974 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34975 }
34976 }
34977 }
34978
34979 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_strided_cn) {
34980 TEST_REQUIRES_ARM_NEON_FMA;
34981 for (uint32_t n = 16; n <= 24; n += 8) {
34982 for (size_t k = 1; k <= 40; k += 9) {
34983 GemmMicrokernelTester()
34984 .mr(1)
34985 .nr(8)
34986 .kr(1)
34987 .sr(1)
34988 .m(1)
34989 .n(n)
34990 .k(k)
34991 .cn_stride(11)
34992 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
34993 }
34994 }
34995 }
34996
34997 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_subtile) {
34998 TEST_REQUIRES_ARM_NEON_FMA;
34999 for (uint32_t n = 16; n <= 24; n += 8) {
35000 for (size_t k = 1; k <= 40; k += 9) {
35001 for (uint32_t m = 1; m <= 1; m++) {
35002 GemmMicrokernelTester()
35003 .mr(1)
35004 .nr(8)
35005 .kr(1)
35006 .sr(1)
35007 .m(m)
35008 .n(n)
35009 .k(k)
35010 .iterations(1)
35011 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35012 }
35013 }
35014 }
35015 }
35016
35017 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel) {
35018 TEST_REQUIRES_ARM_NEON_FMA;
35019 for (size_t k = 1; k <= 40; k += 9) {
35020 GemmMicrokernelTester()
35021 .mr(1)
35022 .nr(8)
35023 .kr(1)
35024 .sr(1)
35025 .m(1)
35026 .n(8)
35027 .k(k)
35028 .ks(3)
35029 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35030 }
35031 }
35032
35033 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, small_kernel_subtile) {
35034 TEST_REQUIRES_ARM_NEON_FMA;
35035 for (size_t k = 1; k <= 40; k += 9) {
35036 for (uint32_t n = 1; n <= 8; n++) {
35037 for (uint32_t m = 1; m <= 1; m++) {
35038 GemmMicrokernelTester()
35039 .mr(1)
35040 .nr(8)
35041 .kr(1)
35042 .sr(1)
35043 .m(m)
35044 .n(n)
35045 .k(k)
35046 .ks(3)
35047 .iterations(1)
35048 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35049 }
35050 }
35051 }
35052 }
35053
35054 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_gt_8_small_kernel) {
35055 TEST_REQUIRES_ARM_NEON_FMA;
35056 for (uint32_t n = 9; n < 16; n++) {
35057 for (size_t k = 1; k <= 40; k += 9) {
35058 GemmMicrokernelTester()
35059 .mr(1)
35060 .nr(8)
35061 .kr(1)
35062 .sr(1)
35063 .m(1)
35064 .n(n)
35065 .k(k)
35066 .ks(3)
35067 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35068 }
35069 }
35070 }
35071
35072 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, n_div_8_small_kernel) {
35073 TEST_REQUIRES_ARM_NEON_FMA;
35074 for (uint32_t n = 16; n <= 24; n += 8) {
35075 for (size_t k = 1; k <= 40; k += 9) {
35076 GemmMicrokernelTester()
35077 .mr(1)
35078 .nr(8)
35079 .kr(1)
35080 .sr(1)
35081 .m(1)
35082 .n(n)
35083 .k(k)
35084 .ks(3)
35085 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35086 }
35087 }
35088 }
35089
35090 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm_subtile) {
35091 TEST_REQUIRES_ARM_NEON_FMA;
35092 for (size_t k = 1; k <= 40; k += 9) {
35093 for (uint32_t n = 1; n <= 8; n++) {
35094 for (uint32_t m = 1; m <= 1; m++) {
35095 GemmMicrokernelTester()
35096 .mr(1)
35097 .nr(8)
35098 .kr(1)
35099 .sr(1)
35100 .m(m)
35101 .n(n)
35102 .k(k)
35103 .cm_stride(11)
35104 .iterations(1)
35105 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35106 }
35107 }
35108 }
35109 }
35110
35111 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, a_offset) {
35112 TEST_REQUIRES_ARM_NEON_FMA;
35113 for (size_t k = 1; k <= 40; k += 9) {
35114 GemmMicrokernelTester()
35115 .mr(1)
35116 .nr(8)
35117 .kr(1)
35118 .sr(1)
35119 .m(1)
35120 .n(8)
35121 .k(k)
35122 .ks(3)
35123 .a_offset(43)
35124 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35125 }
35126 }
35127
35128 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, zero) {
35129 TEST_REQUIRES_ARM_NEON_FMA;
35130 for (size_t k = 1; k <= 40; k += 9) {
35131 for (uint32_t mz = 0; mz < 1; mz++) {
35132 GemmMicrokernelTester()
35133 .mr(1)
35134 .nr(8)
35135 .kr(1)
35136 .sr(1)
35137 .m(1)
35138 .n(8)
35139 .k(k)
35140 .ks(3)
35141 .a_offset(43)
35142 .zero_index(mz)
35143 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35144 }
35145 }
35146 }
35147
35148 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, qmin) {
35149 TEST_REQUIRES_ARM_NEON_FMA;
35150 GemmMicrokernelTester()
35151 .mr(1)
35152 .nr(8)
35153 .kr(1)
35154 .sr(1)
35155 .m(1)
35156 .n(8)
35157 .k(8)
35158 .qmin(128)
35159 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35160 }
35161
35162 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, qmax) {
35163 TEST_REQUIRES_ARM_NEON_FMA;
35164 GemmMicrokernelTester()
35165 .mr(1)
35166 .nr(8)
35167 .kr(1)
35168 .sr(1)
35169 .m(1)
35170 .n(8)
35171 .k(8)
35172 .qmax(128)
35173 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35174 }
35175
35176 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_CORTEX_A75, strided_cm) {
35177 TEST_REQUIRES_ARM_NEON_FMA;
35178 GemmMicrokernelTester()
35179 .mr(1)
35180 .nr(8)
35181 .kr(1)
35182 .sr(1)
35183 .m(1)
35184 .n(8)
35185 .k(8)
35186 .cm_stride(11)
35187 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_cortex_a75, xnn_init_f32_minmax_scalar_params);
35188 }
35189#endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
35190
35191
35192#if XNN_ARCH_ARM64 && XNN_PLATFORM_JIT
35193 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8) {
35194 TEST_REQUIRES_ARM_NEON_FMA;
35195 GemmMicrokernelTester()
35196 .mr(1)
35197 .nr(8)
35198 .kr(1)
35199 .sr(1)
35200 .m(1)
35201 .n(8)
35202 .k(8)
35203 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35204 }
35205
35206 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cn) {
35207 TEST_REQUIRES_ARM_NEON_FMA;
35208 GemmMicrokernelTester()
35209 .mr(1)
35210 .nr(8)
35211 .kr(1)
35212 .sr(1)
35213 .m(1)
35214 .n(8)
35215 .k(8)
35216 .cn_stride(11)
35217 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35218 }
35219
35220 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile) {
35221 TEST_REQUIRES_ARM_NEON_FMA;
35222 for (uint32_t n = 1; n <= 8; n++) {
35223 for (uint32_t m = 1; m <= 1; m++) {
35224 GemmMicrokernelTester()
35225 .mr(1)
35226 .nr(8)
35227 .kr(1)
35228 .sr(1)
35229 .m(m)
35230 .n(n)
35231 .k(8)
35232 .iterations(1)
35233 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35234 }
35235 }
35236 }
35237
35238 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
35239 TEST_REQUIRES_ARM_NEON_FMA;
35240 for (uint32_t m = 1; m <= 1; m++) {
35241 GemmMicrokernelTester()
35242 .mr(1)
35243 .nr(8)
35244 .kr(1)
35245 .sr(1)
35246 .m(m)
35247 .n(8)
35248 .k(8)
35249 .iterations(1)
35250 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35251 }
35252 }
35253
35254 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
35255 TEST_REQUIRES_ARM_NEON_FMA;
35256 for (uint32_t n = 1; n <= 8; n++) {
35257 GemmMicrokernelTester()
35258 .mr(1)
35259 .nr(8)
35260 .kr(1)
35261 .sr(1)
35262 .m(1)
35263 .n(n)
35264 .k(8)
35265 .iterations(1)
35266 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35267 }
35268 }
35269
35270 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16) {
35271 TEST_REQUIRES_ARM_NEON_FMA;
35272 GemmMicrokernelTester()
35273 .mr(1)
35274 .nr(8)
35275 .kr(1)
35276 .sr(1)
35277 .m(1)
35278 .n(8)
35279 .k(16)
35280 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35281 }
35282
35283 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_eq_16_subtile) {
35284 TEST_REQUIRES_ARM_NEON_FMA;
35285 for (uint32_t n = 1; n <= 8; n++) {
35286 for (uint32_t m = 1; m <= 1; m++) {
35287 GemmMicrokernelTester()
35288 .mr(1)
35289 .nr(8)
35290 .kr(1)
35291 .sr(1)
35292 .m(m)
35293 .n(n)
35294 .k(16)
35295 .iterations(1)
35296 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35297 }
35298 }
35299 }
35300
35301 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16) {
35302 TEST_REQUIRES_ARM_NEON_FMA;
35303 for (size_t k = 1; k < 16; k++) {
35304 GemmMicrokernelTester()
35305 .mr(1)
35306 .nr(8)
35307 .kr(1)
35308 .sr(1)
35309 .m(1)
35310 .n(8)
35311 .k(k)
35312 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35313 }
35314 }
35315
35316 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_lt_16_subtile) {
35317 TEST_REQUIRES_ARM_NEON_FMA;
35318 for (size_t k = 1; k < 16; k++) {
35319 for (uint32_t n = 1; n <= 8; n++) {
35320 for (uint32_t m = 1; m <= 1; m++) {
35321 GemmMicrokernelTester()
35322 .mr(1)
35323 .nr(8)
35324 .kr(1)
35325 .sr(1)
35326 .m(m)
35327 .n(n)
35328 .k(k)
35329 .iterations(1)
35330 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35331 }
35332 }
35333 }
35334 }
35335
35336 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16) {
35337 TEST_REQUIRES_ARM_NEON_FMA;
35338 for (size_t k = 17; k < 32; k++) {
35339 GemmMicrokernelTester()
35340 .mr(1)
35341 .nr(8)
35342 .kr(1)
35343 .sr(1)
35344 .m(1)
35345 .n(8)
35346 .k(k)
35347 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35348 }
35349 }
35350
35351 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_gt_16_subtile) {
35352 TEST_REQUIRES_ARM_NEON_FMA;
35353 for (size_t k = 17; k < 32; k++) {
35354 for (uint32_t n = 1; n <= 8; n++) {
35355 for (uint32_t m = 1; m <= 1; m++) {
35356 GemmMicrokernelTester()
35357 .mr(1)
35358 .nr(8)
35359 .kr(1)
35360 .sr(1)
35361 .m(m)
35362 .n(n)
35363 .k(k)
35364 .iterations(1)
35365 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35366 }
35367 }
35368 }
35369 }
35370
35371 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8) {
35372 TEST_REQUIRES_ARM_NEON_FMA;
35373 for (size_t k = 24; k <= 80; k += 8) {
35374 GemmMicrokernelTester()
35375 .mr(1)
35376 .nr(8)
35377 .kr(1)
35378 .sr(1)
35379 .m(1)
35380 .n(8)
35381 .k(k)
35382 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35383 }
35384 }
35385
35386 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, k_div_8_subtile) {
35387 TEST_REQUIRES_ARM_NEON_FMA;
35388 for (size_t k = 24; k <= 80; k += 8) {
35389 for (uint32_t n = 1; n <= 8; n++) {
35390 for (uint32_t m = 1; m <= 1; m++) {
35391 GemmMicrokernelTester()
35392 .mr(1)
35393 .nr(8)
35394 .kr(1)
35395 .sr(1)
35396 .m(m)
35397 .n(n)
35398 .k(k)
35399 .iterations(1)
35400 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35401 }
35402 }
35403 }
35404 }
35405
35406 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8) {
35407 TEST_REQUIRES_ARM_NEON_FMA;
35408 for (uint32_t n = 9; n < 16; n++) {
35409 for (size_t k = 1; k <= 40; k += 9) {
35410 GemmMicrokernelTester()
35411 .mr(1)
35412 .nr(8)
35413 .kr(1)
35414 .sr(1)
35415 .m(1)
35416 .n(n)
35417 .k(k)
35418 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35419 }
35420 }
35421 }
35422
35423 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_strided_cn) {
35424 TEST_REQUIRES_ARM_NEON_FMA;
35425 for (uint32_t n = 9; n < 16; n++) {
35426 for (size_t k = 1; k <= 40; k += 9) {
35427 GemmMicrokernelTester()
35428 .mr(1)
35429 .nr(8)
35430 .kr(1)
35431 .sr(1)
35432 .m(1)
35433 .n(n)
35434 .k(k)
35435 .cn_stride(11)
35436 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35437 }
35438 }
35439 }
35440
35441 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_subtile) {
35442 TEST_REQUIRES_ARM_NEON_FMA;
35443 for (uint32_t n = 9; n < 16; n++) {
35444 for (size_t k = 1; k <= 40; k += 9) {
35445 for (uint32_t m = 1; m <= 1; m++) {
35446 GemmMicrokernelTester()
35447 .mr(1)
35448 .nr(8)
35449 .kr(1)
35450 .sr(1)
35451 .m(m)
35452 .n(n)
35453 .k(k)
35454 .iterations(1)
35455 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35456 }
35457 }
35458 }
35459 }
35460
35461 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8) {
35462 TEST_REQUIRES_ARM_NEON_FMA;
35463 for (uint32_t n = 16; n <= 24; n += 8) {
35464 for (size_t k = 1; k <= 40; k += 9) {
35465 GemmMicrokernelTester()
35466 .mr(1)
35467 .nr(8)
35468 .kr(1)
35469 .sr(1)
35470 .m(1)
35471 .n(n)
35472 .k(k)
35473 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35474 }
35475 }
35476 }
35477
35478 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_strided_cn) {
35479 TEST_REQUIRES_ARM_NEON_FMA;
35480 for (uint32_t n = 16; n <= 24; n += 8) {
35481 for (size_t k = 1; k <= 40; k += 9) {
35482 GemmMicrokernelTester()
35483 .mr(1)
35484 .nr(8)
35485 .kr(1)
35486 .sr(1)
35487 .m(1)
35488 .n(n)
35489 .k(k)
35490 .cn_stride(11)
35491 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35492 }
35493 }
35494 }
35495
35496 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_subtile) {
35497 TEST_REQUIRES_ARM_NEON_FMA;
35498 for (uint32_t n = 16; n <= 24; n += 8) {
35499 for (size_t k = 1; k <= 40; k += 9) {
35500 for (uint32_t m = 1; m <= 1; m++) {
35501 GemmMicrokernelTester()
35502 .mr(1)
35503 .nr(8)
35504 .kr(1)
35505 .sr(1)
35506 .m(m)
35507 .n(n)
35508 .k(k)
35509 .iterations(1)
35510 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35511 }
35512 }
35513 }
35514 }
35515
35516 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel) {
35517 TEST_REQUIRES_ARM_NEON_FMA;
35518 for (size_t k = 1; k <= 40; k += 9) {
35519 GemmMicrokernelTester()
35520 .mr(1)
35521 .nr(8)
35522 .kr(1)
35523 .sr(1)
35524 .m(1)
35525 .n(8)
35526 .k(k)
35527 .ks(3)
35528 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35529 }
35530 }
35531
35532 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, small_kernel_subtile) {
35533 TEST_REQUIRES_ARM_NEON_FMA;
35534 for (size_t k = 1; k <= 40; k += 9) {
35535 for (uint32_t n = 1; n <= 8; n++) {
35536 for (uint32_t m = 1; m <= 1; m++) {
35537 GemmMicrokernelTester()
35538 .mr(1)
35539 .nr(8)
35540 .kr(1)
35541 .sr(1)
35542 .m(m)
35543 .n(n)
35544 .k(k)
35545 .ks(3)
35546 .iterations(1)
35547 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35548 }
35549 }
35550 }
35551 }
35552
35553 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_gt_8_small_kernel) {
35554 TEST_REQUIRES_ARM_NEON_FMA;
35555 for (uint32_t n = 9; n < 16; n++) {
35556 for (size_t k = 1; k <= 40; k += 9) {
35557 GemmMicrokernelTester()
35558 .mr(1)
35559 .nr(8)
35560 .kr(1)
35561 .sr(1)
35562 .m(1)
35563 .n(n)
35564 .k(k)
35565 .ks(3)
35566 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35567 }
35568 }
35569 }
35570
35571 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, n_div_8_small_kernel) {
35572 TEST_REQUIRES_ARM_NEON_FMA;
35573 for (uint32_t n = 16; n <= 24; n += 8) {
35574 for (size_t k = 1; k <= 40; k += 9) {
35575 GemmMicrokernelTester()
35576 .mr(1)
35577 .nr(8)
35578 .kr(1)
35579 .sr(1)
35580 .m(1)
35581 .n(n)
35582 .k(k)
35583 .ks(3)
35584 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35585 }
35586 }
35587 }
35588
35589 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm_subtile) {
35590 TEST_REQUIRES_ARM_NEON_FMA;
35591 for (size_t k = 1; k <= 40; k += 9) {
35592 for (uint32_t n = 1; n <= 8; n++) {
35593 for (uint32_t m = 1; m <= 1; m++) {
35594 GemmMicrokernelTester()
35595 .mr(1)
35596 .nr(8)
35597 .kr(1)
35598 .sr(1)
35599 .m(m)
35600 .n(n)
35601 .k(k)
35602 .cm_stride(11)
35603 .iterations(1)
35604 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35605 }
35606 }
35607 }
35608 }
35609
35610 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, a_offset) {
35611 TEST_REQUIRES_ARM_NEON_FMA;
35612 for (size_t k = 1; k <= 40; k += 9) {
35613 GemmMicrokernelTester()
35614 .mr(1)
35615 .nr(8)
35616 .kr(1)
35617 .sr(1)
35618 .m(1)
35619 .n(8)
35620 .k(k)
35621 .ks(3)
35622 .a_offset(43)
35623 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35624 }
35625 }
35626
35627 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, zero) {
35628 TEST_REQUIRES_ARM_NEON_FMA;
35629 for (size_t k = 1; k <= 40; k += 9) {
35630 for (uint32_t mz = 0; mz < 1; mz++) {
35631 GemmMicrokernelTester()
35632 .mr(1)
35633 .nr(8)
35634 .kr(1)
35635 .sr(1)
35636 .m(1)
35637 .n(8)
35638 .k(k)
35639 .ks(3)
35640 .a_offset(43)
35641 .zero_index(mz)
35642 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35643 }
35644 }
35645 }
35646
35647 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmin) {
35648 TEST_REQUIRES_ARM_NEON_FMA;
35649 GemmMicrokernelTester()
35650 .mr(1)
35651 .nr(8)
35652 .kr(1)
35653 .sr(1)
35654 .m(1)
35655 .n(8)
35656 .k(8)
35657 .qmin(128)
35658 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35659 }
35660
35661 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, qmax) {
35662 TEST_REQUIRES_ARM_NEON_FMA;
35663 GemmMicrokernelTester()
35664 .mr(1)
35665 .nr(8)
35666 .kr(1)
35667 .sr(1)
35668 .m(1)
35669 .n(8)
35670 .k(8)
35671 .qmax(128)
35672 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35673 }
35674
35675 TEST(GENERATE_F32_IGEMM_1X8__AARCH64_NEONFMA_PRFM_CORTEX_A75, strided_cm) {
35676 TEST_REQUIRES_ARM_NEON_FMA;
35677 GemmMicrokernelTester()
35678 .mr(1)
35679 .nr(8)
35680 .kr(1)
35681 .sr(1)
35682 .m(1)
35683 .n(8)
35684 .k(8)
35685 .cm_stride(11)
35686 .Test(xnn_generate_f32_igemm_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, xnn_init_f32_minmax_scalar_params);
35687 }
35688#endif // XNN_ARCH_ARM64 && XNN_PLATFORM_JIT